def cli(ctx, endpoint, admin): ctx.config_dir = os.path.expanduser('~/.panoptes/') ctx.config_file = os.path.join(ctx.config_dir, 'config.yml') ctx.config = { 'endpoint': 'https://www.zooniverse.org', 'username': '', 'password': '', } try: with open(ctx.config_file) as conf_f: ctx.config.update(yaml.full_load(conf_f)) except IOError: pass if endpoint: ctx.config['endpoint'] = endpoint if ctx.invoked_subcommand != 'configure': Panoptes.connect( endpoint=ctx.config['endpoint'], username=ctx.config['username'], password=ctx.config['password'], admin=admin, )
def __connect(self): """ Connect to the panoptes client api :return: """ Panoptes.connect(username=self.username, password=self.password) return Project.find(self.project_id)
def add_to_subject_set(subject_set_id, subject_set_file, username=None, password=None): """ Import a 1 column file of subject_ids to a subject_set. Parameters ---------- subject_set_id : str subject set ID linked to the web interface subject_set_file : str one-column file of subject IDs (output of cull_subject_ids) username, password : str, str if passed, will add subject set ids to the subject set on the web. """ lines = [] with open(subject_set_file) as subject_ids: lines.append(subject_ids.read().splitlines()) if username is not None: try: from panoptes_client import Panoptes, SubjectSet except ImportError: print( 'Install https://github.com/zooniverse/panoptes-python-client') sys.exit(1) Panoptes.connect(username=username, password=password) subject_set = SubjectSet.find(subject_set_id) subject_set.add(np.unique(lines)) return
def find_duplicates(): Panoptes.connect(username='******', password=getpass()) gzb_project = Project.find(slug='tingard/galaxy-builder') subject_sets = [] for set in gzb_project.links.subject_sets: subject_sets.append(list(set.subjects)) subjects = [j for i in subject_sets for j in i] subject_set_ids = [[np.int64(j.id) for j in i] for i in subject_sets] ids = [int(i.id) for i in subjects] dr7objids = [np.int64(i.metadata.get('SDSS dr7 id', False)) for i in subjects] pairings = sorted(zip(ids, dr7objids), key=lambda i: i[0]) df = pd.DataFrame(pairings, columns=('subject_id', 'dr7objid')) df = df[df['dr7objid'] != 0].groupby('subject_id').max() n_sids = len(df) n_dr7ids = len(df.groupby('dr7objid')) print('{} unique subject ids'.format(n_sids)) print('{} unique dr7 object ids'.format(n_dr7ids)) print('{} duplicate galaxies'.format(n_sids - n_dr7ids)) groups = np.array([np.concatenate(([i[0]], i[1].index.values)) for i in df.groupby('dr7objid') if len(i[1]) > 1]) # okay, what subject sets are our duplicates? s1 = gzb_project.links.subject_sets[ np.argmax([np.all(np.isin(subject_set_ids[i], groups[:, 1])) for i in range(len(subject_set_ids))]) ] s2 = gzb_project.links.subject_sets[ np.argmax([np.all(np.isin(subject_set_ids[i], groups[:, 2])) for i in range(len(subject_set_ids))]) ] print(s1, s2) return groups
def token(): """ Returns the current oauth token and its expiration date. """ click.echo("Token: {}".format(Panoptes.client().get_bearer_token())) click.echo("Expiry time: {}".format(Panoptes.client().bearer_expires))
def make_tutorial_images(imagePaths, ellipseData, projectData): # Connect to Panoptes Panoptes.connect( username=projectData["user_name"], password=projectData["password"] ) newSubjects = [] for imageId, imagePath in enumerate(imagePaths): print(f"Adding {imagePath}...") try: subjectSet = SubjectSet.find(projectData["subject_set"]) except PanoptesAPIException as e: print(e) return newSubject = Subject() newSubject.add_location(imagePath) newSubject.links.project = subjectSet.links.project newSubject.metadata.update( make_metadata( ellipseData.get_group(imageId).reset_index(drop=True), imagePath ) ) newSubject.save() newSubjects.append(newSubject) subjectSet.add(newSubjects)
def upload_manifest_to_galaxy_zoo(subject_set_name, manifest, galaxy_zoo_id='5733', n_processes=10): """ Save manifest (set of galaxies with metadata prepared) to Galaxy Zoo Args: subject_set_name (str): name for subject set manifest (list): containing dicts of form {png_loc: img.png, key_data: {metadata_col: metadata_value}} galaxy_zoo_id (str): panoptes project id e.g. '5733' for Galaxy Zoo, '6490' for mobile n_processes (int): number of processes with which to upload galaxies in parallel Returns: None """ if 'TEST' in subject_set_name: logging.warning('Testing mode detected - not uploading!') return manifest if galaxy_zoo_id == '5733': logging.info('Uploading to Galaxy Zoo project 5733') elif galaxy_zoo_id == '6490': logging.info('Uploading to mobile app project 6490') else: logging.info('Uploading to unknown project {}'.format(galaxy_zoo_id)) # Important - don't commit the password! zooniverse_login = read_data_from_txt(zooniverse_login_loc) Panoptes.connect(**zooniverse_login) galaxy_zoo = Project.find(galaxy_zoo_id) subject_set = SubjectSet() subject_set.links.project = galaxy_zoo subject_set.display_name = subject_set_name subject_set.save() pbar = tqdm(total=len(manifest), unit=' subjects uploaded') save_subject_params = {'project': galaxy_zoo, 'pbar': pbar} save_subject_partial = functools.partial(save_subject, **save_subject_params) pool = ThreadPool(n_processes) new_subjects = pool.map(save_subject_partial, manifest) pbar.close() pool.close() pool.join() # new_subjects = [] # for subject in manifest: # print(subject) # new_subjects.append(save_subject_partial(subject)) subject_set.add(new_subjects) return manifest # for debugging only
def get_panoptes_auth_token(): # This token is only valid for ~2 hours. Don't use for long-running downloads. # Here, we only need a few calls to get the workflow versions # Will ask the devs to expose this nicely with the already-built expiry check. with open(api_to_json.ZOONIVERSE_LOGIN_LOC, 'r') as f: # beware sneaky shared global state zooniverse_login = json.load(f) Panoptes.connect(**zooniverse_login) return Panoptes._local.panoptes_client.get_bearer_token()
def get_authenticated_panoptes(bearer_token, bearer_expiry): guest_authenticated_panoptes = Panoptes(endpoint=PanoptesUtils.base_url()) guest_authenticated_panoptes.bearer_token = bearer_token guest_authenticated_panoptes.logged_in = True bearer_expiry = datetime.strptime(bearer_expiry, "%Y-%m-%d %H:%M:%S.%f") guest_authenticated_panoptes.bearer_expires = (bearer_expiry) return guest_authenticated_panoptes
def _retrieve_user(user_id): if user_id in users: user = users[user_id] else: Panoptes.connect(endpoint=getenv('PANOPTES_URL', 'https://panoptes.zooniverse.org/'), client_id=getenv('PANOPTES_CLIENT_ID'), client_secret=getenv('PANOPTES_CLIENT_SECRET')) user = User.find(user_id) users[user_id] = user return user
def main(production=False): uname = input('Enter your username: '******'https://panoptes-staging.zooniverse.org', admin=True ) pId = 5733 # if production else 1820 project = Project.find(pId) subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = 'Test_subject_set_' + str(int(time.time())) subject_set.save() loc = os.path.abspath(os.path.dirname(__file__)) subjects = os.listdir(loc + '/subjects') images, differences, model, metadata = [ sorted(( int(re.match(r'{}_([0-9]+)\.(?:json|png)$'.format(s), i).group(1)) for i in subjects if re.match(r'{}_([0-9]+)\.(?:json|png)$'.format(s), i) )) for s in ('difference', 'image', 'model', 'metadata') ] if not images == differences == model == metadata: print( 'Images, differences, model and metadata ' + 'must all have same length' ) # TODO: change subject directory structure to be more efficient # (not having 12,000+ files in a folder...) for i in images: try: with open('{}/subjects/metadata_{}.json'.format(loc, i)) as f: metadata = json.load(f) except IOError: metadata = {} subject_set = uploadSubjectToSet( project, subject_set, [[j.format(loc, i) for j in ( '{}/subjects/image_{}.png', '{}/subjects/difference_{}.json', '{}/subjects/model_{}.json' )]], # locations [metadata], )
def create_subject_set(folder_name, set_name='test_subject_set'): subject_names = [ i.group(1) for i in ( re.match(r'image_(.*?).png', f) for f in os.listdir(folder_name) ) if i is not None ] files = [ [ join(folder_name, file_name) for file_name in ( 'image_{}.png'.format(subject_name), 'difference_{}.json'.format(subject_name), 'model_{}.json'.format(subject_name), 'metadata_{}.json'.format(subject_name), ) ] for subject_name in subject_names ] assert all(os.path.exists(j) for i in files for j in i), 'Missing files!' uname = input('Enter your username: ') pwd = getpass.getpass() Panoptes.connect( username=uname, password=pwd, admin=True ) pId = 5590 project = Project.find(pId) subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = set_name subject_set.save() metadata_list = [] for fs in files: try: with open(fs[3]) as metaF: metadata = json.load(metaF) except IOError: metadata = {} metadata_list.append(metadata) subject_set = uploadSubjectToSet( project, subject_set, [i[:3] for i in files], metadata_list, )
def info(user_id, email, login): """ Displays information about a user. Defaults to the current user if no ID or search criteria are given. """ if (user_id and email) or (user_id and login) or (email and login): click.echo( 'Error: At most only one of user ID, login, or email may be ' 'specified.', err=True, ) return -1 if user_id: user = User.find(user_id) elif email: try: user = next(User.where(email=email)) except StopIteration: user = None if getattr(user, 'email', '') != email: click.echo('User not found', err=True) return -1 else: if not login: login = Panoptes.client().username try: user = next(User.where(login=login)) except StopIteration: user = None if getattr(user, 'login', '') != login: click.echo('User not found', err=True) return -1 click.echo(yaml.dump(user.raw))
def _retrieve_user(user_id): if user_id in users: user = users[user_id] else: Panoptes.connect(endpoint=getenv('PANOPTES_URL', 'https://panoptes.zooniverse.org/'), client_id=getenv('PANOPTES_CLIENT_ID'), client_secret=getenv('PANOPTES_CLIENT_SECRET')) try: user = User.find(user_id) except PanoptesAPIException: # some users are not found in panoptes # return an empty class with an `id` attribute user = CantFindUser(user_id) users[user_id] = user return user
def panoptes_connect(): # file with username and password on the first line with a space in between panoptesuserfile = 'panoptesuserfile.txt' with open(panoptesuserfile) as fp: uinfo = (fp.readline()).strip().split() return Panoptes.connect(username=uinfo[0], password=uinfo[1])
def get_user_details(self, response): authenticated_panoptes = Panoptes( endpoint=PanoptesUtils.base_url(), client_id=PanoptesUtils.client_id(), client_secret=PanoptesUtils.client_secret()) authenticated_panoptes.bearer_token = response['access_token'] authenticated_panoptes.logged_in = True authenticated_panoptes.refresh_token = response['refresh_token'] bearer_expiry = datetime.now() + timedelta( seconds=response['expires_in']) authenticated_panoptes.bearer_expires = (bearer_expiry) with authenticated_panoptes: user = authenticated_panoptes.get('/me')[0]['users'][0] ids = ['admin user'] if not user['admin']: ids = [ project.href for project in Project.where( current_user_roles='collaborator') ] return { 'username': user['login'], 'email': user['email'], 'is_superuser': user['admin'], 'projects': ids }
def upload_images(id, use_database=True): print('Create subject set and upload images for', id) if use_database: update_status(id, gz_status='Uploading') wd = os.getcwd() Panoptes.connect(username='******', password=os.environ['PANOPTES_PASSWORD']) os.chdir(target + id) project = Project.find(slug='chrismrp/radio-galaxy-zoo-lofar') subject_set = SubjectSet() subject_set.display_name = id subject_set.links.project = project subject_set.save() print('Made subject set') new_subjects = [] g = glob.glob('*-manifest.txt') for i, f in enumerate(g): bits = open(f).readlines()[0].split(',') metadata = { 'subject_id': int(bits[0]), 'ra': float(bits[5]), 'dec': float(bits[6]), '#size': float(bits[7]), 'source_name': bits[4] } print('Upload doing', bits[4], '%i/%i' % (i, len(g))) subject = Subject() subject.links.project = project subject.metadata.update(metadata) for location in bits[1:4]: subject.add_location(location) subject.save() new_subjects.append(subject) subject_set.add(new_subjects) workflow = Workflow(11973) workflow.links.subject_sets.add(subject_set) if use_database: update_status(id, gz_status='In progress') print('Done!')
def main(argv=None): """Main caller for workflow1to2""" parser = argparse.ArgumentParser( description="Add subject sets to workflow 2a,b,c from classifications") parser.add_argument('username', type=str, help='zooniverse username') parser.add_argument('password', type=str, help='password') parser.add_argument('-o', '--overwrite', action='store_true') parser.add_argument('-a', '--add', action='store_true') args = parser.parse_args(argv) # log in. Panoptes.connect(username=args.username, password=args.password) classifications_filename = 'astronomy-rewind-classifications.csv' wf1_filename = 'astronomy-rewind-classifications_wf1.csv' cull_wf1(classifications_filename, wf1_filename, overwrite=args.overwrite) cull_subject_ids(wf1_filename, overwrite=args.overwrite, add=args.add)
def main(production=False): uname = input('Enter your username: '******'https://panoptes-staging.zooniverse.org', admin=True) pId = 5590 if production else 1820 project = Project.find(pId) subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = 'Test_subject_set_' + str(int(time.time())) subject_set.save() loc = os.path.abspath(os.path.dirname(__file__)) subjects = os.listdir(loc + '/subjects') # TODO: change subject directory structure to be more efficient # (not having 12,000+ files in a folder...) for i in range(20): if 'image_{}.png'.format(i) in subjects: try: with open('{}/subjects/metadata_{}.json'.format(loc, i)) as f: metadata = json.load(f) except IOError: metadata = {} subject_set = uploadSubjectToSet( project, subject_set, [[ j.format(loc, i) for j in ('{}/subjects/image_{}.png', '{}/subjects/difference_{}.json', '{}/subjects/model_{}.json') ]], # locations [metadata], ) else: break
def cli(ctx, endpoint): ctx.config_dir = os.path.join(os.environ['HOME'], '.panoptes') ctx.config_file = os.path.join(ctx.config_dir, 'config.yml') ctx.config = { 'endpoint': 'https://panoptes.zooniverse.org', 'username': '', 'password': '', } try: with open(ctx.config_file) as conf_f: ctx.config.update(yaml.load(conf_f)) except IOError: pass if endpoint: ctx.config['endpoint'] = endpoint Panoptes.connect( endpoint=ctx.config['endpoint'], username=ctx.config['username'], password=ctx.config['password'] )
def create_subjects_and_link_to_project(self, proto_subjects, project_id, workflow_id, subject_set_id): try: USERNAME = os.getenv('PANOPTES_USERNAME') PASSWORD = os.getenv('PANOPTES_PASSWORD') Panoptes.connect(username=USERNAME, password=PASSWORD, endpoint=self.ENDPOINT) project = Project.find(project_id) workflow = Workflow().find(workflow_id) if subject_set_id == None: subject_set = SubjectSet() ts = time.gmtime() subject_set.display_name = time.strftime( "%m-%d-%Y %H:%M:%S", ts) subject_set.links.project = project subject_set.save() else: subject_set = SubjectSet().find(subject_set_id) subjects = [] for proto_subject in proto_subjects: subject = Subject() subject.links.project = project subject.add_location(proto_subject['location_lc']) subject.add_location(proto_subject['location_ps']) subject.metadata.update(proto_subject['metadata']) subject.save() subjects.append(subject) subject_set.add(subjects) workflow.add_subject_sets(subject_set) except Exception: self.log.exception("Error in create_subjects_and_link_to_project ")
def get_user_details(self, response): with Panoptes() as p: p.bearer_token = response['access_token'] p.logged_in = True p.refresh_token = response['refresh_token'] p.bearer_expires = (datetime.now() + timedelta(seconds=response['expires_in'])) user = p.get('/me')[0]['users'][0] ids = ['admin user'] if not user['admin']: ids = [project.id for project in Project.where()] return { 'username': user['login'], 'email': user['email'], 'is_superuser': user['admin'], 'projects': ids, }
def apply(self, filenames): if self.pipeline.multiple_subject_sets: scope = self.bundle else: scope = self.pipeline theia_authenticated_client = Panoptes( endpoint=PanoptesUtils.base_url(), client_id=PanoptesUtils.client_id(), client_secret=PanoptesUtils.client_secret() ) with theia_authenticated_client: target_set = self._get_subject_set(scope, self.project.id, scope.name_subject_set()) using_manifest = False metadata_dictionary = {} path_example = filenames[0] manifest_file_location = path.join((path.dirname(path_example) + "_interstitial_products"), "manifest.csv") if self.include_metadata and path.exists(manifest_file_location): using_manifest = True with open(manifest_file_location, newline='') as csvfile: reader = csv.DictReader(csvfile) for row in reader: metadata_dictionary[row['#filename']] = row for filename in filenames: img = Image.open(filename) img.save(filename, 'png') #This line might have to be done with os.path to translate across OSes name_only = filename.split("/")[len(filename.split("/")) - 1] metadata = {} if using_manifest: metadata = metadata_dictionary[name_only] new_subject = self._create_subject(self.project.id, filename, metadata=metadata) target_set.add(new_subject)
def get_conn(self): if self._panoptes_client is None: self.log.info(f"{self.__class__.__name__} version {__version__}") self.log.debug( f"getting connection information from {self._conn_id}") config = self.get_connection(self._conn_id) ctyp = config.conn_type or self.DEFAULT_CONN_TYPE host = config.host or self.DEFAULT_HOST port = config.port or self.DEFAULT_PORT slug = config.schema login = config.login password = config.password if config.extra: try: extra = json.loads(config.extra) except json.decoder.JSONDecodeError: self._auto_disable_subject_sets = False else: self._auto_disable_subject_sets = extra.get( "auto_disable_ssets", False) if not login: raise MissingLoginError(self._conn_id) if not password: raise MissingPasswordError(self._conn_id) if not slug: raise MissingSchemaError(self._conn_id) project_slug = f"{login}/{slug}" endpoint = f"{ctyp}://{host}:{port}" self._panoptes_client = Panoptes.connect(username=login, password=password, endpoint=endpoint) self._project = Project.find(slug=project_slug) self.log.info( f"Searching project by slug {project_slug} found: {self._project}" ) return self._panoptes_client, self._project
GZ: specobjid, dr8objid, dr7objid, ra, dec, t01_smooth_or_features_a02_features_or_disk_weighted_fraction, t02_edgeon_a05_no_weighted_fraction, t03_bar_a06_bar_weighted_fraction, t04_spiral_a08_spiral_weighted_fraction 3) Sky match the tables by RA and DEC in topcat, with the MaNGA data as table 1, and GZ as table 2. 4) Remove the second set of RA and DEC columns, and name the first ones 'RA' and 'DEC' 5) Save this file as a csv 6) This only works in python 2 because it uses panoptes_client ''' import numpy as np from panoptes_client import SubjectSet, Subject, Project, Panoptes import os import progressbar as pb myusername = os.environ['PANOPTES_USERNAME'] mypassword = os.environ['PANOPTES_PASSWORD'] Panoptes.connect(username= myusername, password=mypassword) project = Project.find(id='73') fullsample = SubjectSet.find(5326) spirals = SubjectSet.find(5324) bars = SubjectSet.find(5325) progress = pb.ProgressBar(widgets=[pb.Bar(), pb.ETA()]) data = np.genfromtxt('../GZ3D/MatchedData.csv', delimiter = ',', names=True, dtype=[('DEC', float), ('IAUNAME', '|S30'),('IFUTARGETSIZE',int), ('MANGAID', '|S10'),('MANGA_TILEID',int),('NSAID', int), ('PETROTH50',float),('RA',float),('SERSIC_TH50',float), ('Z',float),('specobjid', int),('dr8objid', int), ('dr7objid', int),('t01_smooth_or_features_a02_features_or_disk_weighted_fraction', float), ('t02_edgeon_a05_no_weighted_fraction', float), ('t03_bar_a06_bar_weighted_fraction', float),
import yaml import os import urllib SUBJECT_ID_FILE = 'subjects.txt' with open('config.yaml') as config_f: config = yaml.load(config_f, Loader=yaml.FullLoader) with open(SUBJECT_ID_FILE) as subject_id_f: subject_ids = [ s.strip() for s in subject_id_f.readlines() ] Panoptes.connect(**config) with ChargingBar( 'Updating', max=len(subject_ids), suffix='%(percent).1f%% %(eta_td)s' ) as bar: with Subject.async_saves(): for subject_id in subject_ids: bar.next() subject = Subject.find(subject_id) if '!CERiT' in subject.metadata: continue
import re import urllib2 import os import getpass import wikipedia # ask user for login and object they want to classify thing = raw_input("What would you like to classify? ") user = raw_input("Zooniverse username: "******"password: "******"lxml")
#!/usr/bin/env python3 """ Un-flag arbitrary subjects as not processed, useful for debugging workflow processing. """ import sys sys.path.insert(0, "..") from panoptes_client import Panoptes, Subject from lib import settings Panoptes.connect(username=settings.PANOPTES_USERNAME, password=settings.PANOPTES_PASSWORD) # SUBJECT_IDS = ['5823821', '5823822'] # SUBJECT_IDS = ['14813279', '14813280', '14813281'] # SUBJECT_IDS = ['15327062','15327056','15327068','15327065'] # Telegraph tests - SUBJECT_IDS = ['15327068', '15327065', '15327062', '15327059', '15327056'] for subject_id in SUBJECT_IDS: subject = Subject.find(subject_id) subject.metadata[settings.METADATA_KEY_ALREADY_PROCESSED] = False subject.save()
""" This version is written in Python 3.66 This script attempts to retrieve the exif data from existing subject image files and add the datetime to the subject metadata. It requires the project owner credentials to be set up as OS environmental variables, and an appropriate project slug modified on line 11. depending on the camera used to take the original subject image the exif code may be different than that in the code and may need to be modified""" import os from PIL import Image, ExifTags import panoptes_client from panoptes_client import SubjectSet, Project, Panoptes import requests Panoptes.connect(username=os.environ['User_name'], password=os.environ['Password']) project = Project.find(slug='pmason\fossiltrainer') while True: set_id = input('Entry subject set id to update:' + '\n') try: subject_set = SubjectSet.find(set_id) count_subjects = 0 subject_list = [] for subject in subject_set.subjects: count_subjects += 1 if subject.metadata['DateTime'] == '': try: img = Image.open(requests.get(subject.locations[0]['image/jpeg'], stream=True).raw) exif_dict = img._getexif() date_time = exif_dict[306] except (IOError, KeyError): print('Acquiring exif data for ', subject.id, ' failed') continue subject.metadata['DateTime'] = date_time
# DEFINE ZOONIVERSE SUBJECT METADATA, corresponds to METADATA FROM ITEM above, update both accordingly segment['metadata'] = { 'APA Citation': apa_citation, 'Date': item_date, 'Library Of Congress Item ID': loc_id, 'Source Collection': source_collection, 'Title': item_title } segments.append(segment) print('Item segments transformation complete.') return segments segments = transform_item_segments('https://www.loc.gov/item/' + LIBRARY_OF_CONGRESS_ITEM_ID) Panoptes.connect(username=USERNAME, password=PASSWORD, endpoint=ENDPOINT) project = Project.find(PROJECT) subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = segments[0]['metadata']['Title'] # uses item Title as default subject set name, or feel free to hardcode subject_set.save() print('Begin Zooniverse subject upload...') for segment in segments: subject = Subject() subject.links.project = project subject.add_location(segment['location'])
B-1180 Brussels BELGIUM phone : +32 (0)2 373.04.19 e-mail : [email protected] web : www.aeronomie.be ________________________________________________ """ filename = "panoptes_test2_export.csv" from panoptes_client import Project, Panoptes from panoptes_client.panoptes import PanoptesAPIException import requests, sys Panoptes.connect(username=username, password=password) project = Project.find(slug='zooniverse/radio-meteor-zoo') #r = project.get_classifications_export(generate=True, wait=True, wait_timeout=1800) wait_timeout = 60 project.generate_classifications_export() for attempt in range(60): print "wait classification export (attempt %d)" % attempt sys.stdout.flush() try: export = project.wait_classifications_export(wait_timeout) except PanoptesAPIException as e: print str(e)[:32] if str(e)[:32] == "classifications_export not ready": continue
def run(): """ Query for completed subjects, calculate kmeans vertex centroids, fetch subject images, split columns by centroids, row segmentatino with Ocropy. """ logger = setup_logger(settings.APP_NAME, 'log/kmeans_and_enqueue_completed_subjects.log', logging.DEBUG) subject_set_csv = SubjectSetCSV() workflow_router = SubjectSetWorkflowRouter(subject_set_csv, settings, logger) pages_raw_subject_ids = subject_set_csv.raw_pages_subject_ids() logger.debug("Running Wires and Rails Workflow Processor") Panoptes.connect(username=settings.PANOPTES_USERNAME, password=settings.PANOPTES_PASSWORD) retired_subject_ids = [] vertices_and_target_subject_sets = [] for _subject_set_id, metadata in settings.COLUMNS_WORKFLOW_METADATA.items( ): logger.debug("Loading vertices / subject retirement info for %(debug_name)s subject set " \ "(subject set id: %(subject_set_id)d; workflow id: %(workflow_id)d; task id: " \ " %(task_id)s", metadata) classification_kwargs = { 'scope': 'project', 'project_id': settings.PROJECT_ID, 'workflow_id': metadata['workflow_id'] } logger.debug("Loading classifications by params %s", str(classification_kwargs)) classifications_records = [ c for c in Classification.where(**classification_kwargs) ] classifications = VertexClassifications(classifications_records, pages_raw_subject_ids) # Aggregate vertex centroids centroids_by_subject = classifications.vertex_centroids( metadata['task_id']) for subject_id, centroids in centroids_by_subject.items(): # Find target subject set ID, or log and skip the subject try: target_subject_set_id = workflow_router \ .target_subject_set_id(subject_id, classifications_records) except UnidentifiedRawSubjectSetException as ex: logger.error(ex.args[0]) continue except SharedMajorityException as ex: # TODO need add'l monitoring for this, e.g. manual report exception logger.error(ex.args[0]) continue vertices_and_target_subject_sets.append( [subject_id, centroids, target_subject_set_id]) # Aggregate retired subjects workflow = Workflow.find(metadata['workflow_id']) retirement_count = workflow.retirement['options']['count'] retired_subject_ids += classifications.retired_subject_ids( metadata['task_id'], retirement_count) logger.debug( 'Retrieved the following subject centroids for image segmentation: %s', str(vertices_and_target_subject_sets)) logger.debug('For the following retired subject IDs: %s', str(retired_subject_ids)) queue = Queue(connection=Redis(host=settings.REDIS_HOST)) for subject_id, centroids, target_subject_set_id in vertices_and_target_subject_sets: if subject_id not in retired_subject_ids: continue subject = Subject.find(subject_id) if settings.METADATA_KEY_ALREADY_PROCESSED in subject.metadata and \ subject.metadata[settings.METADATA_KEY_ALREADY_PROCESSED]: logger.debug('Skipping subject id %d; already processed.', subject_id) continue logger.debug('Enqueuing subjects id: %d', subject_id) queue.enqueue(QueueOperations.queue_new_subject_creation, subject_id, centroids, target_subject_set_id, timeout=2 * 60 * 60) QueueOperations.flag_subject_as_queued(subject)
size = file_bytes.tell() print('Uploading ', original_file, scale, resized_width, size) # ensure the file pointer is returned to the beginning of the file-like object file_bytes.seek(0, 0) return file_bytes parser = argparse.ArgumentParser(description='Zooniverse Uploader') parser.add_argument('image_dir') parser.add_argument('--subject', '-s', required=True) args = parser.parse_args() set_name = args.subject # connect to zooniverse - requires the User_name and Password to be set up as environmental variables in your OS Panoptes.connect(username=os.environ['ZOONIVERSE_USERNAME'], password=os.environ['ZOONIVERSE_PASSWORD']) # modify the project slug if used for other than Snapshots at Sea project = Project.find(slug='tedcheese/snapshots-at-sea') if not os.path.exists(args.image_dir): print('[%s] does not exist.' % args.image_dir) sys.exit() # load the list of image files found in the directory: # The local file name will be uploaded as metadata with the image file_types = ['jpg', 'jpeg'] subject_metadata = {} for entry in os.listdir(args.image_dir): if entry.partition('.')[2].lower() in file_types: subject_metadata[entry] = {'Filename': entry} print('Found ', len(subject_metadata), ' files to upload in this directory.')
description = 'Shallow clouds are the nemesis of climate modelers. Help us by detecting cloud organization from satellite images.' subject_name = 'EUREC4A-ICON-Scenes' # Read filenames to upload # import pdb; pdb.set_trace() # files = sorted(glob.glob(path1+'*.jpeg')) # files = files + sorted(glob.glob(path2+'*.jpeg')) # files = files + sorted(glob.glob(path3+'*C02*.jpeg')) files = sorted(glob.glob(path1 + '*[02468]??.jpeg')) # Create metadata subject_metadata = {} for f, file in enumerate(files): subject_metadata[file] = {'file': file, 'subject_reference': f} Panoptes.connect(username=username, password=password) # tutorial_project = Project() tutorial_project = Project.find(7699) # tutorial_project.display_name = display_name # tutorial_project.description = description # tutorial_project.primary_language = 'en' # tutorial_project.private =True # tutorial_project.save() subject_set = SubjectSet() subject_set.links.project = tutorial_project subject_set.display_name = subject_name subject_set.save() tutorial_project.reload() print(tutorial_project.links.subject_sets)