with open('config.yaml') as config_f: config = yaml.load(config_f, Loader=yaml.FullLoader) with open(SUBJECT_ID_FILE) as subject_id_f: subject_ids = [ s.strip() for s in subject_id_f.readlines() ] Panoptes.connect(**config) with ChargingBar( 'Updating', max=len(subject_ids), suffix='%(percent).1f%% %(eta_td)s' ) as bar: with Subject.async_saves(): for subject_id in subject_ids: bar.next() subject = Subject.find(subject_id) if '!CERiT' in subject.metadata: continue superwasp_id = subject.metadata.get('Filename', subject.metadata.get('filename')).split('_')[0] coords = superwasp_id.replace('1SWASP', '') coords_quoted = urllib.parse.quote(coords) ra = urllib.parse.quote('{}:{}:{}'.format( coords[1:3], coords[3:5], coords[5:10]
def upload_manifest_to_galaxy_zoo( subject_set_name, manifest, project_id='5733', # default to main GZ project login_loc='zooniverse_login.txt'): """ Save manifest (set of galaxies with metadata prepared) to Galaxy Zoo Args: subject_set_name (str): name for subject set manifest (list): containing dicts of form {png_loc: img.png, key_data: {metadata_col: metadata_value}} project_id (str): panoptes project id e.g. '5733' for Galaxy Zoo, '6490' for mobile n_processes (int): number of processes with which to upload galaxies in parallel Returns: None """ assert os.path.exists(login_loc) if 'TEST' in subject_set_name: logging.warning('Testing mode detected - not uploading!') return manifest if project_id == '5733': logging.info('Uploading to Galaxy Zoo project 5733') elif project_id == '6490': logging.info('Uploading to mobile app project 6490') elif project_id == '8751': logging.info('Uploading to staging project 8751') else: logging.info('Uploading to unknown project {}'.format(project_id)) # Important - don't commit the password! zooniverse_login = read_data_from_txt(login_loc) Panoptes.connect(**zooniverse_login) project = Project.find(project_id) # check if subject set already exists subject_set = None subject_sets = SubjectSet.where(project_id=project_id) for candidate_subject_set in subject_sets: if candidate_subject_set.raw['display_name'] == subject_set_name: # use if it already exists subject_set = candidate_subject_set if not subject_set: # make a new one if not subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = subject_set_name subject_set.save() pbar = tqdm(total=len(manifest), unit=' subjects uploaded') save_subject_params = {'project': project, 'pbar': pbar} save_subject_partial = functools.partial(save_subject, **save_subject_params) # upload in async blocks, to avoid huge join at end manifest_block_start = 0 manifest_block_size = 100 while True: manifest_block = manifest[manifest_block_start:manifest_block_start + manifest_block_size] new_subjects = [] with Subject.async_saves(): for manifest_entry in manifest_block: new_subjects.append(save_subject_partial(manifest_entry)) subject_set.add(new_subjects) logging.info('{} subjects linked'.format(new_subjects)) manifest_block_start += manifest_block_size if manifest_block_start > len(manifest): break return manifest # for debugging only