def add_new_subject(self, image_list, metadata_list, subject_set_name): """ Add a subject and the metadata. image_list and metadata_list must be of equal length :param image_list: list of images to be added :param metadata_list: list of metadata to be added :return: """ # Start by making sure we have two equal length list if len(image_list) != len(metadata_list): print("Image list and metadata list do not match") # Link to the subject set we want subject_set = SubjectSet() subject_set.links.project = self.project subject_set.display_name = subject_set_name subject_set.save() # Go through the image and metadata list and add the items new_subjects = [] for i in range(len(image_list)): subject = Subject() subject.links.project = self.project subject.add_location(image_list[i]) subject.metadata.update(metadata_list[i]) subject.save() new_subjects.append(subject) subject_set.add(new_subjects)
def push_new_row_subjects(self, source_subject, target_subject_set_id, row_paths_by_column): """ Given image paths for the new column-indexed rows (row_paths_by_column), push new unclassified row subjects to the appropriate subject set, with metadata references to the source subject and column. """ project = Project.find(settings.PROJECT_ID) subject_set_unclassified_rows = SubjectSet.find(target_subject_set_id) new_row_subjects = [] for column_index, row_paths in row_paths_by_column.items(): self._logger.info('Creating %d new row subjects for column index %d for subject %s', len(row_paths), column_index, source_subject.id) for row_path in row_paths: new_subject = Subject() new_subject.links.project = project copy_source_metadata_fields = ['book', 'page'] for copy_field in copy_source_metadata_fields: new_subject.metadata[copy_field] = source_subject.metadata[copy_field] new_subject.metadata['source_document_subject_id'] = source_subject.id new_subject.metadata['source_document_column_index'] = column_index new_subject.add_location(row_path) new_subject.save() new_row_subjects.append(new_subject) subject_set_unclassified_rows.add(new_row_subjects)
def pushSubject(subjectSet, project, imageLocations, metadata, livePost): if (livePost): subject = Subject() subject.links.project = project for image in imageLocations: subject.add_location(image) subject.metadata.update(metadata) notSaved = True while (notSaved): notSaved = False try: subject.save() except ConnectionError as e: print('{} , TRYING AGAIN'.format(e)) notSaved = True subjectSet.add(subject) return subject else: return None
def save_subject(manifest_item, project, pbar=None): """ Add manifest item to project. Note: follow with subject_set.add(subject) to associate with subject set. Args: manifest_item (dict): of form {png_loc: img.png, key_data: some_data_dict} project (str): project to upload subject too e.g. '5773' for Galaxy Zoo pbar (tqdm.tqdm): progress bar to update. If None, no bar will display. Returns: None """ subject = Subject() subject.links.project = project assert os.path.exists(manifest_item['png_loc']) subject.add_location(manifest_item['png_loc']) subject.metadata.update(manifest_item['key_data']) subject.save() if pbar: pbar.update() return subject
def make_tutorial_images(imagePaths, ellipseData, projectData): # Connect to Panoptes Panoptes.connect( username=projectData["user_name"], password=projectData["password"] ) newSubjects = [] for imageId, imagePath in enumerate(imagePaths): print(f"Adding {imagePath}...") try: subjectSet = SubjectSet.find(projectData["subject_set"]) except PanoptesAPIException as e: print(e) return newSubject = Subject() newSubject.add_location(imagePath) newSubject.links.project = subjectSet.links.project newSubject.metadata.update( make_metadata( ellipseData.get_group(imageId).reset_index(drop=True), imagePath ) ) newSubject.save() newSubjects.append(newSubject) subjectSet.add(newSubjects)
def create_subject(project, metadata, media_files): subject = Subject() subject.links.project = project for media_file in media_files: subject.add_location(media_file) subject.metadata.update(metadata) subject.save() return subject
def _create_subject(self, project_id, filename, metadata=None): subject = Subject() subject.links.project = Project.find(project_id) subject.add_location(filename) if metadata: subject.metadata.update(metadata) subject.save() return subject
def create_subject(project, media_files, metadata): """ Create a subject Args: - project: a Project() object defining the Zooniverse project - media_files: a list of media files to link to the subject - metadata: a dictionary with metadata to attach """ subject = Subject() subject.links.project = project for media in media_files: subject.add_location(media) subject.metadata.update(metadata) subject.save() return subject
def upload_subject(locations: List, project: Project, subject_set_name: str, metadata: Dict): subject = Subject() # add files subject.links.project = project for location in locations: if not os.path.isfile(location): raise FileNotFoundError( 'Missing subject location: {}'.format(location)) subject.add_location(location) subject.metadata.update(metadata) subject_set_name = subject_set_name subject_set = get_or_create_subject_set(project.id, subject_set_name) subject.save() subject_set.add(subject) return subject.id
def upload_images(id, use_database=True): print('Create subject set and upload images for', id) if use_database: update_status(id, gz_status='Uploading') wd = os.getcwd() Panoptes.connect(username='******', password=os.environ['PANOPTES_PASSWORD']) os.chdir(target + id) project = Project.find(slug='chrismrp/radio-galaxy-zoo-lofar') subject_set = SubjectSet() subject_set.display_name = id subject_set.links.project = project subject_set.save() print('Made subject set') new_subjects = [] g = glob.glob('*-manifest.txt') for i, f in enumerate(g): bits = open(f).readlines()[0].split(',') metadata = { 'subject_id': int(bits[0]), 'ra': float(bits[5]), 'dec': float(bits[6]), '#size': float(bits[7]), 'source_name': bits[4] } print('Upload doing', bits[4], '%i/%i' % (i, len(g))) subject = Subject() subject.links.project = project subject.metadata.update(metadata) for location in bits[1:4]: subject.add_location(location) subject.save() new_subjects.append(subject) subject_set.add(new_subjects) workflow = Workflow(11973) workflow.links.subject_sets.add(subject_set) if use_database: update_status(id, gz_status='In progress') print('Done!')
def _create_subjects_from_epicollect5(self, project, subjects_metadata): subjects = list() for metadata in subjects_metadata: subject = Subject() subject.metadata['id'] = metadata['id'] subject.metadata['project'] = metadata['project'] subject.metadata['obs_type'] = metadata['obs_type'] subject.metadata['source'] = metadata['source'] subject.metadata['url'] = metadata['url'] subject.metadata['created_at'] = metadata['created_at'] subject.metadata['observer'] = metadata['observer'] subject.metadata['longitude'] = metadata['location']['longitude'] subject.metadata['latitude'] = metadata['location']['latitude'] subject.metadata['comment'] = metadata['comment'] subject.metadata['spectrum_type'] = metadata.get( 'spectrum_type', "?") subject.add_location({'image/jpg': metadata['url']}) subject.links.project = project subject.save() subjects.append(subject) return subjects
def create_subjects_and_link_to_project(self, proto_subjects, project_id, workflow_id, subject_set_id): try: USERNAME = os.getenv('PANOPTES_USERNAME') PASSWORD = os.getenv('PANOPTES_PASSWORD') Panoptes.connect(username=USERNAME, password=PASSWORD, endpoint=self.ENDPOINT) project = Project.find(project_id) workflow = Workflow().find(workflow_id) if subject_set_id == None: subject_set = SubjectSet() ts = time.gmtime() subject_set.display_name = time.strftime( "%m-%d-%Y %H:%M:%S", ts) subject_set.links.project = project subject_set.save() else: subject_set = SubjectSet().find(subject_set_id) subjects = [] for proto_subject in proto_subjects: subject = Subject() subject.links.project = project subject.add_location(proto_subject['location_lc']) subject.add_location(proto_subject['location_ps']) subject.metadata.update(proto_subject['metadata']) subject.save() subjects.append(subject) subject_set.add(subjects) workflow.add_subject_sets(subject_set) except Exception: self.log.exception("Error in create_subjects_and_link_to_project ")
def main(): ap = argparse.ArgumentParser( description= 'Given a list of images, bins them into subject sets of size n') # require file path to read in images ap.add_argument('-f', '--filename', required=True, dest='filename', type=str, help='The name of the file from which to read the images') # optionally require subject set size; defaults to 1000 ap.add_argument( '-n', '--size', required=False, dest='n', type=int, default=1000, help='The maximum number of images a subject set should contain. \ The value should be between 1 and 10000, inclusive') # parse args into variables and check values args = vars(ap.parse_args()) filename = args['filename'] if args['filename'] else None n = args['n'] if args['n'] else None if not (n >= 1 and n <= 10000): raise ValueError('n must be between 1 and 10000, inclusive') # connect to zooniverse Panoptes.connect(username=zooniverse_config.Zooniverse_USERNAME, password=zooniverse_config.Zooniverse_PASS) project = Project.find(zooniverse_config.Project_ID) # connection to mongodb mongoConn = MongoClient(csh_db_config.DB_HOST + ":" + str(csh_db_config.DB_PORT)) cshTransDB = mongoConn[csh_db_config.TRANSCRIPTION_DB_NAME] cshTransDB.authenticate(csh_db_config.TRANSCRIPTION_DB_USER, csh_db_config.TRANSCRIPTION_DB_PASS) cshCollection = cshTransDB[csh_db_config.TRANS_DB_MeetingMinColl] # track subject sets being created subjectSets = [] # get the image filenames in a Python list with open(filename) as handle: filenames = handle.readlines() # divide files into groups of n filegroups = list([e for e in t if e != None] for t in itertools.zip_longest(*([iter(filenames)] * n))) for group in filegroups: displayName = '{:%Y-%b-%d %H:%M:%S}'.format(datetime.datetime.now()) # create a new subject set subjectSet = SubjectSet() subjectSet.links.project = project subjectSet.display_name = displayName subjectSet.save() subjectSetId = subjectSet.id subjectSets.append(subjectSetId) # create a new subject for each file and add to the subject set for filename in group: # remove trailing '\n' character filename = filename.rstrip() # create a new subject subject = Subject() subject.links.project = project filepath = cshCollection.find_one({'_id': filename})['file']['anonPath'] subject.add_location(filepath) subject.metadata['ID'] = filename subject.save() # add to subject set subjectSet.add(subject) # retrieve and update the record from mongodb updateQuery = { '$set': { 'canCrowdsource': True, 'transcription': { 'numClassifications': 5, 'subjectSetId': subjectSetId, 'status': 'sent' } } } record = cshCollection.find_one_and_update({'_id': filename}, updateQuery) # add subject sets to the workflow workflow = project.links.workflows[0] workflow.add_subject_sets(subjectSets) # print helpful information to the console print('{} subject sets created with the following IDs: {}'.format( len(subjectSets), subjectSets))
new_subjects = 0 old_subjects = 0 failed_subjects = 0 working_on = [] # loop over the preloaded manifest file for metadata in manifest_list: working_on = [metadata['subject'], metadata['image1']] # test for previously uploaded if metadata['image1'] not in previous_subjects: try: subject = Subject() subject.links.project = project # find the files in the metadata listing and add their locations for file in list(metadata.values())[1:]: if file.find('.jpg') > 0: subject.add_location(directory + os.sep + file) # update subject metadata subject.metadata.update(metadata) # again nothing happens until these wo line below, comment them out for testing subject.save() subject_set.add(subject.id) new_subjects += 1 build_part = '{} successfully uploaded at {}'.format(working_on, str(datetime.now())[0:19]) + '\n' except panoptes_client.panoptes.PanoptesAPIException: failed_subjects += 1 build_part = 'An error occurred during the upload of {}'.format(working_on) + '\n' else: old_subjects += 1 build_part = '{} previously uploaded'.format(working_on) + '\n' print(build_part, end='') if save:
subject_metadata = {} for f, file in enumerate(files): subject_metadata[file] = {'file': file, 'subject_reference': f} Panoptes.connect(username=username, password=password) # tutorial_project = Project() tutorial_project = Project.find(7699) # tutorial_project.display_name = display_name # tutorial_project.description = description # tutorial_project.primary_language = 'en' # tutorial_project.private =True # tutorial_project.save() subject_set = SubjectSet() subject_set.links.project = tutorial_project subject_set.display_name = subject_name subject_set.save() tutorial_project.reload() print(tutorial_project.links.subject_sets) new_subjects = [] for filename, metadata in tqdm.tqdm(subject_metadata.items()): subject = Subject() subject.links.project = tutorial_project subject.add_location(filename) subject.metadata.update(metadata) subject.save() new_subjects.append(subject) subject_set.add(new_subjects)
quit() # create a new subject set for the new data and link it to the project above subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = set_name subject_set.save() print('Uploading subjects, this could take a while!') new_subjects = 0 old_subjects = 0 for filename, metadata in subject_metadata.items(): try: if filename not in previous_subjects: subject = Subject() subject.links.project = project subject.add_location(location + os.sep + filename) subject.metadata.update(metadata) subject.save() subject_set.add(subject.id) print(filename) new_subjects += 1 else: old_subjects += 1 except panoptes_client.panoptes.PanoptesAPIException: print('An error occurred during the upload of ', filename) print(new_subjects, 'new subjects created and uploaded', old_subjects, 'already uploaded') uploaded = 0 with open(location + os.sep + 'Uploaded subjects.csv', 'wt') as file: subject_set = SubjectSet.where(project_id=project.id,
except StopIteration: # Crea un nuevo subject set para los nuevos datos y lo asocia al proyecto. subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = image_set_name subject_set.save() # Adicción de las muestras al subject set. with open(manifest_images_file, 'r') as mani_file: print('Uploading image_set') r = csv.DictReader(mani_file) for line in r: subject = Subject() subject.links.project = project subject.add_location(line['lc']) subject.add_location(line['sp']) subject.metadata['subject_id'] = line['id'] subject.save() subject_set.add(subject.id) # ------- Subject set de sonidos ------- # Conexión con el subject set correspondiente o creación de uno nuevo en caso # de que este no exista. try: # Comprueba si existe el subject set. subject_set = SubjectSet.where(project_id=project.id, display_name=audio_set_name).next() except StopIteration: # Crea un nuevo subject set para los nuevos datos y lo asocia al proyecto. subject_set = SubjectSet()
images = [a['src'] for a in soup.find_all("img", {"src": re.compile("gstatic.com")})] #print images for img in images: raw_img = urllib2.urlopen(img).read() #add the directory for your image here DIR="images/" cntr = len([i for i in os.listdir(DIR) if image_type in i]) + 1 f = open(DIR + image_type + "_"+ str(cntr)+".jpg", 'wb') f.write(raw_img) f.close() print 'Creating image set...' # create the subject set. subject_set = SubjectSet() subject_set.links.project = p subject_set.display_name = "Images of " + thing + '\'s' subject_set.save() print 'Uploading images to Zooniverse...' # add all images to subject set for i in range(1,21): subject = Subject() subject.links.project = p subject.add_location('images/' + str(thing) + '_' + str(i)+'.jpg') subject.save() subject_set.add(subject) print 'Complete.'
# get data-time from original video file try: video_data = FFProbe(location + os.sep + original_file) datetime = video_data.metadata['creation_time'] except (IOError, KeyError, TypeError): print('Acquiring exif data for ', original_file, ' failed') datetime = '' # finally we are ready for the actual upload of the modified file: try: subject = Subject() subject.links.project = project compress(location + os.sep + original_file) print('Compressed ', original_file, 'to', os.path.getsize('temp.mp4'), 'bytes, uploading....') subject.add_location('temp.mp4') videos_uploaded += 1 # update the subject metadata (add '#' to the beginning of the field name to hide that field) subject.metadata['Site_Date'] = set_name subject.metadata['Filename'] = original_file subject.metadata['Date_time'] = datetime # nothing is actually uploaded to panoptes until the save is executed. # for testing without actually uploading anything comment out the following two lines subject.save() subject_set.add(subject.id) except panoptes_client.panoptes.PanoptesAPIException: print('An error occurred during the upload of ', original_file) print(videos_uploaded, 'videos uploaded') # cleanup the temporary file at the end if os.path.isfile('temp.mp4'): os.remove('temp.mp4')
previous_subjects.append(subject.metadata['Filename']) except StopIteration: # create a new subject set for the new data and link it to the project above subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = set_name subject_set.save() print('Uploading subjects, this could take a while!') new_subjects = 0 for filename, metadata in subject_metadata.items(): try: if filename not in previous_subjects: subject = Subject() subject.links.project = project subject.add_location(compress(args.image_dir, filename, 960)) subject.metadata.update(metadata) subject.save() subject_set.add(subject.id) new_subjects += 1 except panoptes_client.panoptes.PanoptesAPIException: print('An error occurred during the upload of ', filename) print(new_subjects, 'new subjects created and uploaded') print('Uploading complete, Please wait while the full subject listing is prepared and saved in') output_file = "uploaded_subjects.csv" print('"%s" in the drive with the original images' % output_file) uploaded = 0 with open(os.path.join(args.image_dir, output_file), 'wt') as file_up:
def upload_subjects( subject_set_id, manifest_files, allow_missing, remote_location, mime_type, file_column, ): """ Uploads subjects from each of the given MANIFEST_FILES. Example with only local files: $ panoptes subject-set upload-subjects 4667 manifest.csv Local filenames will be automatically detected in the manifest and uploaded, or filename columns can be specified with --file-column. If you are hosting your media yourself, you can put the URLs in the manifest and specify the column number(s): $ panoptes subject-set upload-subjects -r 1 4667 manifest.csv $ panoptes subject-set upload-subjects -r 1 -r 2 4667 manifest.csv Any local files will still be detected and uploaded. """ if ( len(manifest_files) > 1 and any(map(lambda m: m.endswith('.yaml'), manifest_files)) ): click.echo( 'Error: YAML manifests must be processed one at a time.', err=True, ) return -1 elif manifest_files[0].endswith('.yaml'): with open(manifest_files[0], 'r') as yaml_manifest: upload_state = yaml.load(yaml_manifest, Loader=yaml.FullLoader) if upload_state['state_version'] > CURRENT_STATE_VERSION: click.echo( 'Error: {} was generated by a newer version of the Panoptes ' 'CLI and is not compatible with this version.'.format( manifest_files[0], ), err=True, ) return -1 if upload_state['subject_set_id'] != subject_set_id: click.echo( 'Warning: You specified subject set {} but this YAML ' 'manifest is for subject set {}.'.format( subject_set_id, upload_state['subject_set_id'], ), err=True, ) click.confirm( 'Upload {} to subject set {} ({})?'.format( manifest_files[0], subject_set_id, SubjectSet.find(subject_set_id).display_name, ), abort=True ) upload_state['subject_set_id'] = subject_set_id resumed_upload = True else: upload_state = { 'state_version': CURRENT_STATE_VERSION, 'subject_set_id': subject_set_id, 'manifest_files': manifest_files, 'allow_missing': allow_missing, 'remote_location': remote_location, 'mime_type': mime_type, 'file_column': file_column, 'waiting_to_upload': [], 'waiting_to_link': {}, } resumed_upload = False remote_location_count = len(upload_state['remote_location']) mime_type_count = len(upload_state['mime_type']) if remote_location_count > 1 and mime_type_count == 1: upload_state['mime_type'] = ( upload_state['mime_type'] * remote_location_count ) elif remote_location_count > 0 and mime_type_count != remote_location_count: click.echo( 'Error: The number of MIME types given must be either 1 or equal ' 'to the number of remote locations.', err=True, ) return -1 def validate_file(file_path): if not os.path.isfile(file_path): click.echo( 'Error: File "{}" could not be found.'.format( file_path, ), err=True, ) return False file_size = os.path.getsize(file_path) if file_size == 0: click.echo( 'Error: File "{}" is empty.'.format( file_path, ), err=True, ) return False elif file_size > MAX_UPLOAD_FILE_SIZE: click.echo( 'Error: File "{}" is {}, larger than the maximum {}.'.format( file_path, humanize.naturalsize(file_size), humanize.naturalsize(MAX_UPLOAD_FILE_SIZE), ), err=True, ) return False return True subject_set = SubjectSet.find(upload_state['subject_set_id']) if not resumed_upload: subject_rows = [] for manifest_file in upload_state['manifest_files']: with open(manifest_file, 'U') as manifest_f: file_root = os.path.dirname(manifest_file) r = csv.reader(manifest_f, skipinitialspace=True) headers = next(r) for row in r: metadata = dict(zip(headers, row)) files = [] if not upload_state['file_column']: upload_state['file_column'] = [] for field_number, col in enumerate(row, start=1): file_path = os.path.join(file_root, col) if os.path.exists(file_path): upload_state['file_column'].append( field_number, ) if not validate_file(file_path): return -1 files.append(file_path) else: for field_number in upload_state['file_column']: file_path = os.path.join( file_root, row[field_number - 1] ) if not validate_file(file_path): return -1 files.append(file_path) for field_number, _mime_type in zip( upload_state['remote_location'], upload_state['mime_type'], ): files.append({_mime_type: row[field_number - 1]}) if len(files) == 0: click.echo( 'Could not find any files in row:', err=True, ) click.echo(','.join(row), err=True) if not upload_state['allow_missing']: return -1 else: continue subject_rows.append((files, metadata)) if not subject_rows: click.echo( 'File {} did not contain any rows.'.format( manifest_file, ), err=True, ) return -1 subject_rows = list(enumerate(subject_rows)) upload_state['waiting_to_upload'] = copy.deepcopy(subject_rows) else: for subject_id, subject_row in upload_state['waiting_to_link'].items(): try: subject = Subject.find(subject_id) except PanoptesAPIException: upload_state['waiting_to_upload'].append(subject_row) del upload_state['waiting_to_link'][subject_id] subject_rows = copy.deepcopy(upload_state['waiting_to_upload']) pending_subjects = [] def move_created(limit): while len(pending_subjects) > limit: for subject, subject_row in pending_subjects: if subject.async_save_result: pending_subjects.remove((subject, subject_row)) upload_state['waiting_to_upload'].remove(subject_row) upload_state['waiting_to_link'][subject.id] = subject_row time.sleep(0.5) def link_subjects(limit): if len(upload_state['waiting_to_link']) > limit: subject_set.add(list(upload_state['waiting_to_link'].keys())) upload_state['waiting_to_link'].clear() with click.progressbar( subject_rows, length=len(subject_rows), label='Uploading subjects', ) as _subject_rows: try: with Subject.async_saves(): for subject_row in _subject_rows: count, (files, metadata) = subject_row subject = Subject() subject.links.project = subject_set.links.project for media_file in files: subject.add_location(media_file) subject.metadata.update(metadata) subject.save() pending_subjects.append((subject, subject_row)) move_created(MAX_PENDING_SUBJECTS) link_subjects(LINK_BATCH_SIZE) move_created(0) link_subjects(0) finally: if ( len(pending_subjects) > 0 or len(upload_state['waiting_to_link']) > 0 ): click.echo('Error: Upload failed.', err=True) if click.confirm( 'Would you like to save the upload state to resume the ' 'upload later?', default=True, ): while True: state_file_name = 'panoptes-upload-{}.yaml'.format( subject_set_id, ) state_file_name = click.prompt( 'Enter filename to save to', default=state_file_name, ) if not state_file_name.endswith('.yaml'): click.echo( 'Error: File name must end in ".yaml".', err=True, ) if click.confirm( 'Save to {}.yaml?'.format(state_file_name), default=True, ): state_file_name += '.yaml' else: continue if not is_valid_filename(state_file_name): click.echo( 'Error: {} is not a valid file name'.format( state_file_name, ), err=True, ) sanitized_filename = sanitize_filename( state_file_name, ) if click.confirm( 'Save to {}?'.format( sanitized_filename, ), default=True, ): state_file_name = sanitized_filename else: continue if os.path.exists(state_file_name): if not click.confirm( 'File {} already exists. Overwrite?'.format( state_file_name, ), default=False, ): continue break with open(state_file_name, 'w') as state_file: yaml.dump(upload_state, state_file)
def upload_chunks(self, destination, project_slug, set_prefix, zooniverse_login, zooniverse_pwd, batches=0, **kwargs): self.destination = destination metadata_location = os.path.join(self.destination, 'chunks.csv') try: self.chunks = pd.read_csv(metadata_location, index_col='index') except: raise Exception( "cannot read chunk metadata in {}. Check the --destination parameter, and make sure you have extracted chunks before." .format(metadata_location)) Panoptes.connect(username=zooniverse_login, password=zooniverse_pwd) zooniverse_project = Project.find(slug=project_slug) uploaded = 0 for batch, chunks in self.chunks.groupby('batch'): if chunks['uploaded'].all(): continue subjects_metadata = [] subject_set = SubjectSet() subject_set.links.project = zooniverse_project subject_set.display_name = "{}_batch_{}".format(set_prefix, batch) subject_set.save() subjects = [] _chunks = chunks.to_dict(orient='index') for chunk_index in _chunks: chunk = _chunks[chunk_index] print("uploading chunk {} ({},{}) in batch {}".format( chunk['recording'], chunk['onset'], chunk['offset'], batch)) subject = Subject() subject.links.project = zooniverse_project subject.add_location( os.path.join(self.destination, 'chunks', chunk['mp3'])) subject.metadata['date_extracted'] = chunk['date_extracted'] subject.save() subjects.append(subject) chunk['index'] = chunk_index chunk['zooniverse_id'] = subject.id chunk['project_slug'] = project_slug chunk['subject_set'] = str(subject_set.display_name) chunk['uploaded'] = True subjects_metadata.append(chunk) subject_set.add(subjects) self.chunks.update( pd.DataFrame(subjects_metadata).set_index('index')) self.chunks.to_csv(os.path.join(self.destination, 'chunks.csv')) uploaded += 1 if batches > 0 and uploaded >= batches: return
if len(files) == 0: raise Exception('Error finding PNG files. Did you specify correct station? ('+BASEDIR+'ZOO/'+station+'/*.png)') metadata = open(BASEDIR+station+'.zoo','r') (fft,overlap,color_min,color_max) = metadata.readlines() #Create uploaded directory if necessary dest = BASEDIR+'ZOO/'+station+'/uploaded/' if not(os.path.isdir(dest)): os.mkdir(dest) for file in files: print "Uploading file %s" % file sys.stdout.flush() subject = Subject() subject.links.project = project subject.add_location(file) # You can set whatever metadata you want, or none at all subject.metadata['filename'] = os.path.basename(file) #TODO subject.metadata['file_start'] = #TODO subject.metadata['sample_rate'] = 5512 subject.metadata['fft'] = fft subject.metadata['overlap'] = overlap subject.metadata['color_min'] = color_min subject.metadata['color_max'] = color_max #TODO subject.metadata['width'] = #TODO subject.metadata['height'] = subject.save() subjects.append(subject) os.rename(file,dest+os.path.basename(file)) #move file to uploaded directory #Create a new subject set or append the subjects to an existing one for subject_set in project.links.subject_sets:
project = Project.find(slug='pmason/fossiltrainer') # modify subject set name as needed: set_name = 'test_url' # This section sets up a subject set try: # check if the subject set already exits subject_set = SubjectSet.where(project_id=project.id, display_name=set_name).next() except StopIteration: # create a new subject set for the new data and link it to the project above subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = set_name subject_set.save() # This section adds subjects from a manifest to the above subject set with open(manifest_file, 'r') as mani_file: r = csv.DictReader(mani_file) for line in r: subject = Subject() subject.links.project = project # modify the next three lines with the appropriate column headers from the manifest file subject.add_location({'image/jpeg': line['link']}) subject.metadata['subject_id'] = line['subject_id'] subject.metadata['image_name'] = line['image_name'] subject.save() subject_set.add(subject.id)
except StopIteration: # create a new subject set for the new data and link it to the project above subject_set_new = SubjectSet() subject_set_new.links.project = proj subject_set_new.display_name = new_set_name subject_set_new.save() # iterate through the subjects duplicating them and verifying they are created. k = 0 for old_sub in add_subjects: old_subject = Subject(old_sub) try: new_subject = Subject() new_subject.links.project = proj for loc in old_subject.locations: new_subject.add_location(loc) new_subject.metadata = old_subject.metadata new_subject.save() subject_set_new.add(new_subject) print(new_subject.id, 'duplicated in new set to new set') k += 1 except panoptes_client.panoptes.PanoptesAPIException: print(old_sub, 'did not duplicate correctly', str(sys.exc_info()[1])) print(k, ' subjects linked to subject set ', new_set_name, ' in project ', proj_id) linked = 0 with open(os.getcwd() + os.sep + 'duplicated_subjects.csv', 'wt', newline='', encoding='utf-8') as file: fieldnames = ['subject_id', 'Metadata', 'Locations'] writer = csv.DictWriter(file, fieldnames=fieldnames) writer.writeheader() subject_set = SubjectSet.where(project_id=proj_id, display_name=new_set_name).next()
new_subjects = [] for img in images: try: s = Subject() s.links.project = project # manifest file if os.path.splitext(img)[1] == ".csv": # upload manifest info.... not sure how this will be set up after second step # move csv to complete images folder shutil.copy(f, completed_images) # make dict out of csv file for upload manifest = csv.DictReader(open(img)) s.metadata.update(manifest) else: # upload image to subject s.add_location(img) s.save() new_subjects.append(s) image_count+=1 except Exception as e: f = open(logfile, "a") t = time.localtime() # move error files into seperate folder os.rename(img, errorfiles + os.path.basename(os.path.normpath(img))) f.write('Unable to upload ' + img + ': ' + str(e) + ' '+time.strftime("%D:%H:%M:%S", t)+'\n\n') f.close() try: # add subjects to subject set subject_set.save() subject_set.add(new_subjects)
if retry.lower() == 'n': quit() # create a new subject set for the new data and link it to the project above subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = set_name subject_set.save() print('Uploading subjects, this could take a while!') new_subjects = 0 for filename, metadata in subject_metadata.items(): try: if filename not in previous_subjects: subject = Subject() subject.links.project = project subject.add_location(compress(location, filename, 960)) subject.metadata.update(metadata) subject.save() subject_set.add(subject.id) new_subjects += 1 except panoptes_client.panoptes.PanoptesAPIException: print('An error occurred during the upload of ', filename) print(new_subjects, 'new subjects created and uploaded') print( 'Uploading complete, Please wait while the full subject listing is prepared and saved in' ) print('"Uploaded subjects.csv" in the drive with the original images') uploaded = 0 with open(location + os.sep + 'Uploaded subjects.csv', 'wt') as file_up: file_up.write('subject.id' + ',' + 'Filename' + '\n')
} segments.append(segment) print('Item segments transformation complete.') return segments segments = transform_item_segments('https://www.loc.gov/item/' + LIBRARY_OF_CONGRESS_ITEM_ID) Panoptes.connect(username=USERNAME, password=PASSWORD, endpoint=ENDPOINT) project = Project.find(PROJECT) subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = segments[0]['metadata']['Title'] # uses item Title as default subject set name, or feel free to hardcode subject_set.save() print('Begin Zooniverse subject upload...') for segment in segments: subject = Subject() subject.links.project = project subject.add_location(segment['location']) subject.metadata.update(segment['metadata']) subject.save() subject_set.add(subject) print("Zooniverse subject upload complete.")
def upload_chunks(self, chunks: str, project_id: int, set_name: str, zooniverse_login="", zooniverse_pwd="", amount: int = 1000, ignore_errors: bool = False, **kwargs): """Uploads ``amount`` audio chunks from the CSV dataframe `chunks` to a zooniverse project. :param chunks: path to the chunk CSV dataframe :type chunks: [type] :param project_id: zooniverse project id :type project_id: int :param set_name: name of the subject set :type set_name: str :param zooniverse_login: zooniverse login. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_LOGIN`` instead, defaults to '' :type zooniverse_login: str, optional :param zooniverse_pwd: zooniverse password. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_PWD`` instead, defaults to '' :type zooniverse_pwd: str, optional :param amount: amount of chunks to upload, defaults to 0 :type amount: int, optional """ self.chunks_file = chunks self.get_credentials(zooniverse_login, zooniverse_pwd) metadata_location = os.path.join(self.chunks_file) try: self.chunks = pd.read_csv(metadata_location, index_col="index") except: raise Exception("cannot read chunk metadata from {}.".format( metadata_location)) assert_dataframe("chunks", self.chunks) assert_columns_presence( "chunks", self.chunks, {"recording_filename", "onset", "offset", "uploaded", "mp3"}, ) from panoptes_client import Panoptes, Project, Subject, SubjectSet Panoptes.connect(username=self.zooniverse_login, password=self.zooniverse_pwd) zooniverse_project = Project(project_id) subjects_metadata = [] uploaded = 0 subject_set = None for ss in zooniverse_project.links.subject_sets: if ss.display_name == set_name: subject_set = ss if subject_set is None: subject_set = SubjectSet() subject_set.links.project = zooniverse_project subject_set.display_name = set_name subject_set.save() subjects = [] chunks_to_upload = self.chunks[self.chunks["uploaded"] == False].head( amount) chunks_to_upload = chunks_to_upload.to_dict(orient="index") if len(chunks_to_upload) == 0: print("nothing left to upload.") return for chunk_index in chunks_to_upload: chunk = chunks_to_upload[chunk_index] print("uploading chunk {} ({},{})".format( chunk["recording_filename"], chunk["onset"], chunk["offset"])) subject = Subject() subject.links.project = zooniverse_project subject.add_location( os.path.join(os.path.dirname(self.chunks_file), "chunks", chunk["mp3"])) subject.metadata["date_extracted"] = chunk["date_extracted"] try: subject.save() except Exception as e: print("failed to save chunk {}. an exception has occured:\n{}". format(chunk_index, str(e))) print(traceback.format_exc()) if args.ignore_errors: continue else: print("subject upload halting here.") break subjects.append(subject) chunk["index"] = chunk_index chunk["zooniverse_id"] = str(subject.id) chunk["project_id"] = str(project_id) chunk["subject_set"] = str(subject_set.display_name) chunk["uploaded"] = True subjects_metadata.append(chunk) if len(subjects) == 0: return subject_set.add(subjects) self.chunks.update(pd.DataFrame(subjects_metadata).set_index("index")) self.chunks.to_csv(self.chunks_file)
for i in range(eights, groups): group_list.append(7) k = 0 for index in range(0, len(group_list)): date_times = '' files = '' try: subject = Subject() new_subjects += 1 subject.links.project = project for j in range(0, group_list[index]): compressed_file = compress( location + os.sep + seq[k][0], 900000, r'C:\py\image_manipulation\temp_file.jpg') subject.add_location(compressed_file) files += seq[k][0] + ', ' date_times += seq[k][1] + ', ' images_uploaded += 1 k += 1 subject.metadata['File_group'] = files[:-2] subject.metadata['Site_Date'] = set_name subject.metadata['Date_times'] = date_times[:-2] print('Uploading group, this could take a while!') subject.save() subject_set.add(subject.id) print(new_subjects, subject.metadata['File_group'], subject.metadata['Date_times']) except panoptes_client.panoptes.PanoptesAPIException: print('An error occurred during the upload of ', files) print(images_uploaded, 'images uploaded into', new_subjects, 'subjects')
#(img) C:/Users/Rdebbout/Downloads/vids_DUL>ffmpeg -i test_out4.mp4 -b 1397520 bit_down2.mp4 # resolution #(img) C:/Users/Rdebbout/Downloads/vids_DUL>ffmpeg -i test_out4.mp4 -vf scale=960:540 bit_down_scale.mp4 # ffprobe -v quiet -print_format json -show_format -show_streams test_out4.mp4 > op.json ################################################################################ from panoptes_client import SubjectSet, Subject, Project, Panoptes Panoptes.connect(username='******', password='******') project = Project.find(id = 5483) subject_set = SubjectSet.find(17639) subject = Subject() subject.links.project = project subject.add_location({'video/mp4': ('C:/Users/Rdebbout/Downloads/vids_DUL/' 'test_frame_rate/duo_DVR150925_1432_001clip.mp4')}) subject.metadata['site_id'] = 'NCCAGL10-1047' subject.save() subject_set.add(subject) ################################################################################ here = 'C:/Users/Rdebbout/Downloads/vids_DUL/test_frame_rate/prepare_ye' tbl_list = pd.read_csv('CitSci_VideoList_beta.csv') for f in os.listdir(here): print f subprocess.call('ffmpeg -i {0} -vf scale=960:540 {1}_test.mpeg'.format(f,f.split('.')[0])) ################################################################################
def create_subjects_and_link_to_project(proto_subjects, project_id, subject_set_id, subject_set_name=None): ''' find the project and relevant subject set. Get the existing subject data and compare to the new proto_subjects. Upload any instances of nbew subjects to the project Keyword Arguments: proto_subjects -- dictionary structure containing subject filepath+filename, and associated metadata project_id -- identifier to find and link with the project subject_set_id -- identifier for the subject set of interest ''' # get the project object project = Project.find(project_id) # set up subject_set if subject_set_id == None: subject_set = SubjectSet() # create empty subject_set subject_set.links.project = project if subject_set_name == None: # if not defined generate a random subject set name to avoid error when a set already exists subject_set_name = 'subject_set_{:02d}_{:02d}_{:04d}_{}'.format( date.day, date.month, date.year, ''.join(generate_random_str())) print("will create a subject set called: {}".format(subject_set_name)) subject_set.display_name = subject_set_name # set the name of the subject set subject_set.save() project.reload() else: subject_set = SubjectSet().find( subject_set_id) # find the existing subject_set existing_subject_set_name = subject_set.display_name # get its name # if you have tried to set the subject set name, check that it matches the name for the chosen subject set id if (subject_set_name != None) and (existing_subject_set_name != subject_set_name): print( "your chosen subject set name does not match the existing name: {}, {}" .format(subject_set_name, existing_subject_set_name)) return -1 else: subject_set_name = existing_subject_set_name print("add to existing subject set: {}".format(subject_set_name)) # Create a list of the existing subject metadata meta_list = [] print("existing subjects:") for subject in subject_set.subjects: print(subject.id, subject.metadata) meta_list.append(subject.metadata) # When making list of subjects to add, check to see if the metadata of the subject you want to add is already in the set print("new subjects:") new_subjects = [] for filename, metadata in proto_subjects.items(): # check if this subject is already in the subject set if np.isin(metadata, meta_list): print("{}, subject already in set".format(metadata)) # In this case we skip over the subject that already exists. # N.B. you may want to remove an existing subject and update it with the new one continue # Otherwise we can add the subject to the new subject list else: subject = Subject() subject.links.project = project subject.add_location(filename) subject.metadata.update(metadata) subject.save() new_subjects.append(subject) print("{}, new subject add to list".format(metadata)) print("new subjects to add: {}".format(new_subjects)) # add the new subject list (data and metadata) to the already defined project subject set subject_set.add(new_subjects) return
if np.isnan(row['t01_smooth_or_features_a02_features_or_disk_weighted_fraction']): pbar = 'NaN' pspiral = 'NaN' dr8id = 'NaN' dr7id = 'NaN' specid = 'NaN' else: pbar = row['t01_smooth_or_features_a02_features_or_disk_weighted_fraction']*row['t02_edgeon_a05_no_weighted_fraction']*row['t03_bar_a06_bar_weighted_fraction'] pspiral = row['t01_smooth_or_features_a02_features_or_disk_weighted_fraction']*row['t02_edgeon_a05_no_weighted_fraction']*row['t04_spiral_a08_spiral_weighted_fraction'] dr8id = row['dr8objid'] dr7id = row['dr7objid'] specid = row['specobjid'] summer += 1 subject = Subject() subject.links.project = project subject.add_location('./manga_mpl4_cutouts/cutouts/{0}.jpg'.format(row['MANGAID'].decode('utf-8'))) subject.metadata['RA'] = row['RA'] subject.metadata['DEC'] = row['DEC'] subject.metadata['MANGAID'] = row['MANGAID'].decode('utf-8') subject.metadata['Z'] = row['Z'] subject.metadata['PETROTH50'] = row['PETROTH50'] subject.metadata['#MANGA_TILEID'] = row['MANGA_TILEID'] subject.metadata['#NSAID'] = row['NSAID'] subject.metadata['#SERSIC_TH50'] = row['SERSIC_TH50'] subject.metadata['#P(Bar)'] = pbar subject.metadata['#P(Spiral)'] = pspiral subject.metadata['#specobjid'] = specid subject.metadata['#dr8objid'] = dr8id subject.metadata['#dr7objid'] = dr7id try: subject.save()