def push_new_row_subjects(self, source_subject, target_subject_set_id, row_paths_by_column):
        """
        Given image paths for the new column-indexed rows (row_paths_by_column), push new
        unclassified row subjects to the appropriate subject set, with metadata references to the
        source subject and column.
        """
        project = Project.find(settings.PROJECT_ID)

        subject_set_unclassified_rows = SubjectSet.find(target_subject_set_id)

        new_row_subjects = []

        for column_index, row_paths in row_paths_by_column.items():
            self._logger.info('Creating %d new row subjects for column index %d for subject %s',
                              len(row_paths), column_index, source_subject.id)
            for row_path in row_paths:
                new_subject = Subject()
                new_subject.links.project = project
                copy_source_metadata_fields = ['book', 'page']
                for copy_field in copy_source_metadata_fields:
                    new_subject.metadata[copy_field] = source_subject.metadata[copy_field]
                new_subject.metadata['source_document_subject_id'] = source_subject.id
                new_subject.metadata['source_document_column_index'] = column_index
                new_subject.add_location(row_path)
                new_subject.save()

                new_row_subjects.append(new_subject)

        subject_set_unclassified_rows.add(new_row_subjects)
예제 #2
0
    def link_new_set(self, subject_set_id):
        """

        :param subject_set_id:
        :return:
        """
        workflowSet = Workflow()
        subject = Subject()
        subject.links.project = self.project
        sset = subject.find(subject_set_id)

        print(1)
        workflowSet.links.project = self.project
        print(2)
        workflowSet.links.sub(sset)
예제 #3
0
    def run(self):
        """
        Migrate segmented railroad rows.
        """
        subjects_and_their_target_sets = self._calculate_target_subject_sets_by_subject(
        )
        segmented_rows_and_their_target_sets = self \
            ._segmented_row_target_sets(subjects_and_their_target_sets)
        additions_by_target_set = defaultdict(list)
        removals_by_target_set = defaultdict(list)

        for subject_id, target_subject_set_id in segmented_rows_and_their_target_sets.items(
        ):
            # target_subject_set = self._get_subject_set(target_subject_set_id)
            self._logger.debug('Saving segmented row %d to set: %s',
                               subject_id, target_subject_set_id)
            subject = Subject.find(subject_id)
            additions_by_target_set[target_subject_set_id].append(subject)

            for curr_subject_set in subject.links.subject_sets:
                removals_by_target_set[curr_subject_set.id].append(subject_id)

        # Remove to appropriate target sets
        for target_subject_set_id, new_subjects in additions_by_target_set.items(
        ):
            target_subject_set = self._get_subject_set(target_subject_set_id)
            target_subject_set.add(new_subjects)
예제 #4
0
def uploadSubjectToSet(project, subjectSet, locationsList, metadataList):
    print('Uploading {} subjects to {}'.format(len(locationsList), subjectSet))
    # imagePath can be string or list, metadata must be same dimension
    if not len(locationsList) == len(metadataList):
        print(
            '\t\033[31mInvalid arguments, locationsList and metadataList',
            'must have same length\033[0m'
        )
        return
    subjects = []
    for locations, meta in tqdm(zip(locationsList, metadataList)):
        # the json subjects need to be added in a more manual way so we can
        # specify a MIME type
        subjects.append(Subject())
        subjects[-1].links.project = project
        # comparison between model and image
        addLocation(subjects[-1], {'application/json': locations[1]})
        # actual galaxy image
        subjects[-1].add_location(locations[0])
        # and now just the model
        addLocation(subjects[-1], {'application/json': locations[2]})
        for k, v in meta.items():
            subjects[-1].metadata[k] = v
        try:
            subjects[-1].save()
        except RuntimeError:
            pass
    subjectSet.add(subjects)
    return subjectSet
예제 #5
0
 def _hydrate_book_and_page(cls, row):
     subject = Subject.find(row['subject_id'])
     subject_model = SubjectModel(subject)
     subject.metadata['book'] = subject_model['book']
     subject.metadata['page'] = subject_model['page']
     for field in cls.BOOK_AND_PAGE_FIELDS:
         if subject.metadata[field] is None:
             raise ValueError("WARN: None '%s' for subject %d and filepath %s" % field,
                 subject.id, subject.metadata['filepath'])
     subject.save()
예제 #6
0
def ls(subject_set_id, quiet, subject_ids):
    """
    Lists subject IDs and their media URLs.
    """

    if subject_ids:
        for subject_id in subject_ids:
            subject = Subject.find(subject_id)
            if quiet:
                click.echo(subject.id)
            else:
                echo_subject(subject)
        return

    subjects = Subject.where(subject_set_id=subject_set_id)
    if quiet:
        click.echo(" ".join([s.id for s in subjects]))
    else:
        for subject in subjects:
            echo_subject(subject)
예제 #7
0
def upload_subjects(subject_set_id, manifest_file):
    subject_set = SubjectSet.find(subject_set_id)
    subject_rows = []
    with open(manifest_file) as manifest_f:
        file_root = os.path.dirname(manifest_file)
        r = csv.reader(manifest_f)
        headers = r.next()
        for row in r:
            metadata = dict(zip(headers, row))
            files = []
            for col in row:
                file_match = re.match(IMAGE_REGEX, col)
                file_path = os.path.join(file_root, col)
                if file_match and os.path.exists(file_path):
                    files.append(file_path)
            if len(files) == 0:
                click.echo('Could not find any files in row:', err=True)
                click.echo(','.join(row), err=True)
                return -1
            subject_rows.append((files, metadata))

    created_subjects = []
    with click.progressbar(
        enumerate(subject_rows),
        length=len(subject_rows),
        label='Uploading subjects',
    ) as _subject_rows:
        for count, (files, metadata) in _subject_rows:
            subject = Subject()
            subject.links.project = subject_set.links.project
            map(subject.add_location, files)
            subject.metadata.update(metadata)
            subject.save()
            created_subjects.append(subject)

            if (count + 1) % LINK_BATCH_SIZE == 0:
                subject_set.add(created_subjects)
                created_subjects = []

        if len(created_subjects) > 0:
            subject_set.add(created_subjects)
예제 #8
0
def save_subject(manifest_item, project, pbar=None):
    """

    Add manifest item to project. Note: follow with subject_set.add(subject) to associate with subject set.
    Args:
        manifest_item (dict): of form {png_loc: img.png, key_data: some_data_dict}
        project (str): project to upload subject too e.g. '5773' for Galaxy Zoo
        pbar (tqdm.tqdm): progress bar to update. If None, no bar will display.

    Returns:
        None
    """
    subject = Subject()

    subject.links.project = project
    assert os.path.exists(manifest_item['png_loc'])
    subject.add_location(manifest_item['png_loc'])
    subject.metadata.update(manifest_item['key_data'])

    subject.save()

    if pbar:
        pbar.update()

    return subject
예제 #9
0
    def add_new_subject(self, image_list, metadata_list, subject_set_name):
        """
        Add a subject and the metadata.  image_list and metadata_list must be
        of equal length
        :param image_list: list of images to be added
        :param metadata_list: list of metadata to be added
        :return:
        """

        # Start by making sure we have two equal length list
        if len(image_list) != len(metadata_list):
            print("Image list and metadata list do not match")

        # Link to the subject set we want
        subject_set = SubjectSet()
        subject_set.links.project = self.project
        subject_set.display_name = subject_set_name
        subject_set.save()

        # Go through the image and metadata list and add the items
        new_subjects = []
        for i in range(len(image_list)):
            subject = Subject()
            subject.links.project = self.project
            subject.add_location(image_list[i])
            subject.metadata.update(metadata_list[i])
            subject.save()
            new_subjects.append(subject)

        subject_set.add(new_subjects)
예제 #10
0
def pushSubject(subjectSet, project, imageLocations, metadata, livePost):

    if (livePost):
        subject = Subject()
        subject.links.project = project

        for image in imageLocations:
            subject.add_location(image)

        subject.metadata.update(metadata)

        notSaved = True
        while (notSaved):
            notSaved = False
            try:
                subject.save()
            except ConnectionError as e:
                print('{} , TRYING AGAIN'.format(e))
                notSaved = True

        subjectSet.add(subject)

        return subject

    else:
        return None
def make_tutorial_images(imagePaths, ellipseData, projectData):
    # Connect to Panoptes
    Panoptes.connect(
        username=projectData["user_name"], password=projectData["password"]
    )

    newSubjects = []
    for imageId, imagePath in enumerate(imagePaths):
        print(f"Adding {imagePath}...")
        try:
            subjectSet = SubjectSet.find(projectData["subject_set"])
        except PanoptesAPIException as e:
            print(e)
            return
        newSubject = Subject()
        newSubject.add_location(imagePath)
        newSubject.links.project = subjectSet.links.project
        newSubject.metadata.update(
            make_metadata(
                ellipseData.get_group(imageId).reset_index(drop=True), imagePath
            )
        )
        newSubject.save()
        newSubjects.append(newSubject)
    subjectSet.add(newSubjects)
예제 #12
0
def create_subject(project, metadata, media_files):
    subject = Subject()
    subject.links.project = project
    for media_file in media_files:
        subject.add_location(media_file)
    subject.metadata.update(metadata)
    subject.save()
    return subject
예제 #13
0
    def _create_subject(self, project_id, filename, metadata=None):
        subject = Subject()

        subject.links.project = Project.find(project_id)
        subject.add_location(filename)

        if metadata:
            subject.metadata.update(metadata)

        subject.save()

        return subject
 def queue_new_subject_creation(cls, subject_id, vertex_centroids, target_subject_set_id):
     """
     Given subject ID and vertex centroids, fetch subject image and perform segmentation.
     Static-w/-instance-of-self pattern to support enqueuing in RQ.
     """
     logger = setup_logger(cls.LOGGER_NAME, 'log/queue_operations.log')
     queue_ops = QueueOperations(logger)
     subject = Subject.find(subject_id)
     subject_image_path = queue_ops.fetch_subject_image_to_tmp(subject)
     column_image_paths = queue_ops.perform_column_segmentation(
         subject_id,
         subject_image_path,
         vertex_centroids
     )
     for column_image_path in column_image_paths:
         queue_ops.upscale_small_images(column_image_path)
     row_paths_by_column = queue_ops.perform_row_segmentation(column_image_paths)
     queue_ops.push_new_row_subjects(subject, target_subject_set_id, row_paths_by_column)
def create_subject(project, media_files, metadata):
    """ Create a subject
        Args:
        - project: a Project() object defining the Zooniverse project
        - media_files: a list of media files to link to the subject
        - metadata: a dictionary with metadata to attach
    """
    subject = Subject()
    subject.links.project = project
    for media in media_files:
        subject.add_location(media)
    subject.metadata.update(metadata)
    subject.save()
    return subject
예제 #16
0
def upload_subject(locations: List, project: Project, subject_set_name: str,
                   metadata: Dict):
    subject = Subject()
    # add files
    subject.links.project = project
    for location in locations:
        if not os.path.isfile(location):
            raise FileNotFoundError(
                'Missing subject location: {}'.format(location))
        subject.add_location(location)

    subject.metadata.update(metadata)

    subject_set_name = subject_set_name
    subject_set = get_or_create_subject_set(project.id, subject_set_name)

    subject.save()
    subject_set.add(subject)
    return subject.id
예제 #17
0
def upload_images(id, use_database=True):
    print('Create subject set and upload images for', id)
    if use_database:
        update_status(id, gz_status='Uploading')
    wd = os.getcwd()
    Panoptes.connect(username='******',
                     password=os.environ['PANOPTES_PASSWORD'])
    os.chdir(target + id)
    project = Project.find(slug='chrismrp/radio-galaxy-zoo-lofar')
    subject_set = SubjectSet()

    subject_set.display_name = id
    subject_set.links.project = project
    subject_set.save()
    print('Made subject set')
    new_subjects = []
    g = glob.glob('*-manifest.txt')
    for i, f in enumerate(g):
        bits = open(f).readlines()[0].split(',')
        metadata = {
            'subject_id': int(bits[0]),
            'ra': float(bits[5]),
            'dec': float(bits[6]),
            '#size': float(bits[7]),
            'source_name': bits[4]
        }
        print('Upload doing', bits[4], '%i/%i' % (i, len(g)))
        subject = Subject()
        subject.links.project = project
        subject.metadata.update(metadata)
        for location in bits[1:4]:
            subject.add_location(location)
        subject.save()
        new_subjects.append(subject)

    subject_set.add(new_subjects)

    workflow = Workflow(11973)
    workflow.links.subject_sets.add(subject_set)
    if use_database:
        update_status(id, gz_status='In progress')
    print('Done!')
예제 #18
0
    def create_subjects_and_link_to_project(self, proto_subjects, project_id,
                                            workflow_id, subject_set_id):

        try:
            USERNAME = os.getenv('PANOPTES_USERNAME')
            PASSWORD = os.getenv('PANOPTES_PASSWORD')
            Panoptes.connect(username=USERNAME,
                             password=PASSWORD,
                             endpoint=self.ENDPOINT)

            project = Project.find(project_id)
            workflow = Workflow().find(workflow_id)

            if subject_set_id == None:
                subject_set = SubjectSet()
                ts = time.gmtime()
                subject_set.display_name = time.strftime(
                    "%m-%d-%Y %H:%M:%S", ts)
                subject_set.links.project = project

                subject_set.save()
            else:
                subject_set = SubjectSet().find(subject_set_id)
            subjects = []
            for proto_subject in proto_subjects:
                subject = Subject()
                subject.links.project = project
                subject.add_location(proto_subject['location_lc'])
                subject.add_location(proto_subject['location_ps'])
                subject.metadata.update(proto_subject['metadata'])
                subject.save()
                subjects.append(subject)

            subject_set.add(subjects)
            workflow.add_subject_sets(subject_set)
        except Exception:
            self.log.exception("Error in create_subjects_and_link_to_project ")
예제 #19
0
def run():
    """
    Query for completed subjects, calculate kmeans vertex centroids, fetch subject images, split
    columns by centroids, row segmentatino with Ocropy.
    """

    logger = setup_logger(settings.APP_NAME,
                          'log/kmeans_and_enqueue_completed_subjects.log',
                          logging.DEBUG)

    subject_set_csv = SubjectSetCSV()
    workflow_router = SubjectSetWorkflowRouter(subject_set_csv, settings,
                                               logger)
    pages_raw_subject_ids = subject_set_csv.raw_pages_subject_ids()
    logger.debug("Running Wires and Rails Workflow Processor")
    Panoptes.connect(username=settings.PANOPTES_USERNAME,
                     password=settings.PANOPTES_PASSWORD)

    retired_subject_ids = []

    vertices_and_target_subject_sets = []

    for _subject_set_id, metadata in settings.COLUMNS_WORKFLOW_METADATA.items(
    ):

        logger.debug("Loading vertices / subject retirement info for %(debug_name)s subject set " \
            "(subject set id: %(subject_set_id)d; workflow id: %(workflow_id)d; task id: " \
            " %(task_id)s", metadata)

        classification_kwargs = {
            'scope': 'project',
            'project_id': settings.PROJECT_ID,
            'workflow_id': metadata['workflow_id']
        }
        logger.debug("Loading classifications by params %s",
                     str(classification_kwargs))
        classifications_records = [
            c for c in Classification.where(**classification_kwargs)
        ]

        classifications = VertexClassifications(classifications_records,
                                                pages_raw_subject_ids)

        # Aggregate vertex centroids
        centroids_by_subject = classifications.vertex_centroids(
            metadata['task_id'])
        for subject_id, centroids in centroids_by_subject.items():
            # Find target subject set ID, or log and skip the subject
            try:
                target_subject_set_id = workflow_router \
                    .target_subject_set_id(subject_id, classifications_records)
            except UnidentifiedRawSubjectSetException as ex:
                logger.error(ex.args[0])
                continue
            except SharedMajorityException as ex:
                # TODO need add'l monitoring for this, e.g. manual report exception
                logger.error(ex.args[0])
                continue
            vertices_and_target_subject_sets.append(
                [subject_id, centroids, target_subject_set_id])

        # Aggregate retired subjects
        workflow = Workflow.find(metadata['workflow_id'])
        retirement_count = workflow.retirement['options']['count']
        retired_subject_ids += classifications.retired_subject_ids(
            metadata['task_id'], retirement_count)

    logger.debug(
        'Retrieved the following subject centroids for image segmentation: %s',
        str(vertices_and_target_subject_sets))

    logger.debug('For the following retired subject IDs: %s',
                 str(retired_subject_ids))

    queue = Queue(connection=Redis(host=settings.REDIS_HOST))

    for subject_id, centroids, target_subject_set_id in vertices_and_target_subject_sets:
        if subject_id not in retired_subject_ids:
            continue
        subject = Subject.find(subject_id)
        if settings.METADATA_KEY_ALREADY_PROCESSED in subject.metadata and \
           subject.metadata[settings.METADATA_KEY_ALREADY_PROCESSED]:
            logger.debug('Skipping subject id %d; already processed.',
                         subject_id)
            continue
        logger.debug('Enqueuing subjects id: %d', subject_id)
        queue.enqueue(QueueOperations.queue_new_subject_creation,
                      subject_id,
                      centroids,
                      target_subject_set_id,
                      timeout=2 * 60 * 60)
        QueueOperations.flag_subject_as_queued(subject)
예제 #20
0
    print('\n', 'It may take a while to recover the names of files previously uploaded, to ensure no duplicates')
    for subject in subject_set.subjects:
        previous_subjects.append(subject.metadata['Filename'])
except StopIteration:
    # create a new subject set for the new data and link it to the project above
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = set_name
    subject_set.save()

print('Uploading subjects, this could take a while!')
new_subjects = 0
for filename, metadata in subject_metadata.items():
    try:
        if filename not in previous_subjects:
            subject = Subject()
            subject.links.project = project
            subject.add_location(compress(args.image_dir, filename, 960))
            subject.metadata.update(metadata)
            subject.save()
            subject_set.add(subject.id)
            new_subjects += 1
    except panoptes_client.panoptes.PanoptesAPIException:
        print('An error occurred during the upload of ', filename)
print(new_subjects, 'new subjects created and uploaded')
print('Uploading complete, Please wait while the full subject listing is prepared and saved in')

output_file = "uploaded_subjects.csv"

print('"%s" in the drive with the original images' % output_file)
예제 #21
0
#!/usr/bin/env python3
"""
Un-flag arbitrary subjects as not processed, useful for debugging workflow processing.
"""

import sys
sys.path.insert(0, "..")

from panoptes_client import Panoptes, Subject

from lib import settings

Panoptes.connect(username=settings.PANOPTES_USERNAME,
                 password=settings.PANOPTES_PASSWORD)

# SUBJECT_IDS = ['5823821', '5823822']
# SUBJECT_IDS = ['14813279', '14813280', '14813281']
# SUBJECT_IDS = ['15327062','15327056','15327068','15327065']

# Telegraph tests -
SUBJECT_IDS = ['15327068', '15327065', '15327062', '15327059', '15327056']

for subject_id in SUBJECT_IDS:
    subject = Subject.find(subject_id)
    subject.metadata[settings.METADATA_KEY_ALREADY_PROCESSED] = False
    subject.save()
예제 #22
0
 def _create_subjects_from_epicollect5(self, project, subjects_metadata):
     subjects = list()
     for metadata in subjects_metadata:
         subject = Subject()
         subject.metadata['id'] = metadata['id']
         subject.metadata['project'] = metadata['project']
         subject.metadata['obs_type'] = metadata['obs_type']
         subject.metadata['source'] = metadata['source']
         subject.metadata['url'] = metadata['url']
         subject.metadata['created_at'] = metadata['created_at']
         subject.metadata['observer'] = metadata['observer']
         subject.metadata['longitude'] = metadata['location']['longitude']
         subject.metadata['latitude'] = metadata['location']['latitude']
         subject.metadata['comment'] = metadata['comment']
         subject.metadata['spectrum_type'] = metadata.get(
             'spectrum_type', "?")
         subject.add_location({'image/jpg': metadata['url']})
         subject.links.project = project
         subject.save()
         subjects.append(subject)
     return subjects
예제 #23
0
if len(files) == 0:
    raise Exception(
        'Error finding PNG files. Did you specify correct station? (' +
        BASEDIR + 'ZOO/' + station + '/*.png)')
metadata = open(BASEDIR + station + '.zoo', 'r')
(fft, overlap, color_min, color_max) = metadata.readlines()

#Create uploaded directory if necessary
dest = BASEDIR + 'ZOO/' + station + '/uploaded/'
if not (os.path.isdir(dest)):
    os.mkdir(dest)

for file in files:
    print "Uploading file %s" % file
    sys.stdout.flush()
    subject = Subject()
    subject.links.project = project
    subject.add_location(file)
    # You can set whatever metadata you want, or none at all
    subject.metadata['filename'] = os.path.basename(file)
    #TODO subject.metadata['file_start'] =
    #TODO subject.metadata['sample_rate'] = 5512
    subject.metadata['fft'] = fft
    subject.metadata['overlap'] = overlap
    subject.metadata['color_min'] = color_min
    subject.metadata['color_max'] = color_max
    #TODO subject.metadata['width'] =
    #TODO subject.metadata['height'] =
    subject.save()
    subjects.append(subject)
    os.rename(file,
예제 #24
0

with open('config.yaml') as config_f:
    config = yaml.load(config_f, Loader=yaml.FullLoader)

with open(SUBJECT_ID_FILE) as subject_id_f:
    subject_ids = [ s.strip() for s in subject_id_f.readlines() ]

Panoptes.connect(**config)

with ChargingBar(
    'Updating',
    max=len(subject_ids),
    suffix='%(percent).1f%% %(eta_td)s'
) as bar:
    with Subject.async_saves():
        for subject_id in subject_ids:
            bar.next()

            subject = Subject.find(subject_id)

            if '!CERiT' in subject.metadata:
                continue

            superwasp_id = subject.metadata.get('Filename', subject.metadata.get('filename')).split('_')[0]
            coords = superwasp_id.replace('1SWASP', '')
            coords_quoted = urllib.parse.quote(coords)
            ra = urllib.parse.quote('{}:{}:{}'.format(
                coords[1:3],
                coords[3:5],
                coords[5:10]
예제 #25
0
def ls(subject_id):
    echo_subject(Subject.find(subject_id))
subjects = []
files = glob.glob(BASEDIR+'ZOO/'+station+'/*.png')
if len(files) == 0:
  raise Exception('Error finding PNG files. Did you specify correct station? ('+BASEDIR+'ZOO/'+station+'/*.png)')
metadata = open(BASEDIR+station+'.zoo','r')
(fft,overlap,color_min,color_max) = metadata.readlines()

#Create uploaded directory if necessary
dest = BASEDIR+'ZOO/'+station+'/uploaded/'
if not(os.path.isdir(dest)):
    os.mkdir(dest)

for file in files:
    print "Uploading file %s" % file
    sys.stdout.flush()
    subject = Subject()
    subject.links.project = project
    subject.add_location(file)
    # You can set whatever metadata you want, or none at all
    subject.metadata['filename'] = os.path.basename(file)
    #TODO subject.metadata['file_start'] = 
    #TODO subject.metadata['sample_rate'] = 5512
    subject.metadata['fft'] = fft 
    subject.metadata['overlap'] = overlap
    subject.metadata['color_min'] = color_min
    subject.metadata['color_max'] = color_max
    #TODO subject.metadata['width'] =
    #TODO subject.metadata['height'] =    
    subject.save()
    subjects.append(subject)
    os.rename(file,dest+os.path.basename(file)) #move file to uploaded directory
예제 #27
0
videos_uploaded = 0

for original_file in file_list:  # loop throught the file list
    # test if the file is already uploaded, if so skip it
    if original_file not in previous_subjects:
        # get data-time from original video file
        try:
            video_data = FFProbe(location + os.sep + original_file)
            datetime = video_data.metadata['creation_time']
        except (IOError, KeyError, TypeError):
            print('Acquiring exif data for ', original_file, ' failed')
            datetime = ''

        # finally we are ready for the actual upload of the modified file:
        try:
            subject = Subject()
            subject.links.project = project
            compress(location + os.sep + original_file)
            print('Compressed ', original_file, 'to',
                  os.path.getsize('temp.mp4'), 'bytes, uploading....')
            subject.add_location('temp.mp4')
            videos_uploaded += 1
            # update the subject metadata (add '#' to the beginning of the field name to hide that field)
            subject.metadata['Site_Date'] = set_name
            subject.metadata['Filename'] = original_file
            subject.metadata['Date_time'] = datetime
            # nothing is actually uploaded to panoptes until the save is executed.
            # for testing without actually uploading anything comment out the following two lines
            subject.save()
            subject_set.add(subject.id)
        except panoptes_client.panoptes.PanoptesAPIException:
            }

            segments.append(segment)
    print('Item segments transformation complete.')
    return segments

segments = transform_item_segments('https://www.loc.gov/item/' + LIBRARY_OF_CONGRESS_ITEM_ID)

Panoptes.connect(username=USERNAME, password=PASSWORD, endpoint=ENDPOINT)

project = Project.find(PROJECT)

subject_set = SubjectSet()
subject_set.links.project = project
subject_set.display_name = segments[0]['metadata']['Title'] # uses item Title as default subject set name, or feel free to hardcode
subject_set.save()

print('Begin Zooniverse subject upload...')
for segment in segments:
    subject = Subject()

    subject.links.project = project
    subject.add_location(segment['location'])

    subject.metadata.update(segment['metadata'])

    subject.save()
    subject_set.add(subject)

print("Zooniverse subject upload complete.")
        'Enter "n" to cancel this upload, any other key to continue' + '\n')
    if retry.lower() == 'n':
        quit()
    # create a new subject set for the new data and link it to the project above
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = set_name
    subject_set.save()

print('Uploading subjects, this could take a while!')
new_subjects = 0
old_subjects = 0
for filename, metadata in subject_metadata.items():
    try:
        if filename not in previous_subjects:
            subject = Subject()
            subject.links.project = project
            subject.add_location(location + os.sep + filename)
            subject.metadata.update(metadata)
            subject.save()
            subject_set.add(subject.id)
            print(filename)
            new_subjects += 1
        else:
            old_subjects += 1
    except panoptes_client.panoptes.PanoptesAPIException:
        print('An error occurred during the upload of ', filename)
print(new_subjects, 'new subjects created and uploaded', old_subjects,
      'already uploaded')

uploaded = 0
images = [a['src'] for a in soup.find_all("img", {"src": re.compile("gstatic.com")})]
#print images
for img in images:
  raw_img = urllib2.urlopen(img).read()
  #add the directory for your image here
  DIR="images/"
  cntr = len([i for i in os.listdir(DIR) if image_type in i]) + 1
  f = open(DIR + image_type + "_"+ str(cntr)+".jpg", 'wb')
  f.write(raw_img)
  f.close()

print 'Creating image set...'

# create the subject set.
subject_set = SubjectSet()
subject_set.links.project = p
subject_set.display_name = "Images of " + thing + '\'s'
subject_set.save()

print 'Uploading images to Zooniverse...'

# add all images to subject set
for i in range(1,21):
    subject = Subject()
    subject.links.project = p
    subject.add_location('images/' + str(thing) + '_' + str(i)+'.jpg')
    subject.save()
    subject_set.add(subject)

print 'Complete.'
예제 #31
0
subject_metadata = {}
for f, file in enumerate(files):
    subject_metadata[file] = {'file': file, 'subject_reference': f}

Panoptes.connect(username=username, password=password)
# tutorial_project = Project()
tutorial_project = Project.find(7699)
# tutorial_project.display_name = display_name
# tutorial_project.description = description
# tutorial_project.primary_language = 'en'
# tutorial_project.private =True
# tutorial_project.save()

subject_set = SubjectSet()
subject_set.links.project = tutorial_project
subject_set.display_name = subject_name
subject_set.save()

tutorial_project.reload()
print(tutorial_project.links.subject_sets)

new_subjects = []
for filename, metadata in tqdm.tqdm(subject_metadata.items()):
    subject = Subject()
    subject.links.project = tutorial_project
    subject.add_location(filename)
    subject.metadata.update(metadata)
    subject.save()
    new_subjects.append(subject)

subject_set.add(new_subjects)
예제 #32
0
def upload_subjects(
    subject_set_id,
    manifest_files,
    allow_missing,
    remote_location,
    mime_type,
    file_column,
):
    """
    Uploads subjects from each of the given MANIFEST_FILES.

    Example with only local files:

    $ panoptes subject-set upload-subjects 4667 manifest.csv

    Local filenames will be automatically detected in the manifest and
    uploaded, or filename columns can be specified with --file-column.

    If you are hosting your media yourself, you can put the URLs in the
    manifest and specify the column number(s):

    $ panoptes subject-set upload-subjects -r 1 4667 manifest.csv

    $ panoptes subject-set upload-subjects -r 1 -r 2 4667 manifest.csv

    Any local files will still be detected and uploaded.
    """
    if (
        len(manifest_files) > 1
        and any(map(lambda m: m.endswith('.yaml'), manifest_files))
    ):
        click.echo(
            'Error: YAML manifests must be processed one at a time.',
            err=True,
        )
        return -1
    elif manifest_files[0].endswith('.yaml'):
        with open(manifest_files[0], 'r') as yaml_manifest:
            upload_state = yaml.load(yaml_manifest, Loader=yaml.FullLoader)
        if upload_state['state_version'] > CURRENT_STATE_VERSION:
            click.echo(
                'Error: {} was generated by a newer version of the Panoptes '
                'CLI and is not compatible with this version.'.format(
                    manifest_files[0],
                ),
                err=True,
            )
            return -1
        if upload_state['subject_set_id'] != subject_set_id:
            click.echo(
                'Warning: You specified subject set {} but this YAML '
                'manifest is for subject set {}.'.format(
                    subject_set_id,
                    upload_state['subject_set_id'],
                ),
                err=True,
            )
            click.confirm(
                'Upload {} to subject set {} ({})?'.format(
                    manifest_files[0],
                    subject_set_id,
                    SubjectSet.find(subject_set_id).display_name,
                ),
                abort=True
            )
            upload_state['subject_set_id'] = subject_set_id
        resumed_upload = True
    else:
        upload_state = {
            'state_version': CURRENT_STATE_VERSION,
            'subject_set_id': subject_set_id,
            'manifest_files': manifest_files,
            'allow_missing': allow_missing,
            'remote_location': remote_location,
            'mime_type': mime_type,
            'file_column': file_column,
            'waiting_to_upload': [],
            'waiting_to_link': {},
        }
        resumed_upload = False

    remote_location_count = len(upload_state['remote_location'])
    mime_type_count = len(upload_state['mime_type'])
    if remote_location_count > 1 and mime_type_count == 1:
        upload_state['mime_type'] = (
            upload_state['mime_type'] * remote_location_count
        )
    elif remote_location_count > 0 and mime_type_count != remote_location_count:
        click.echo(
            'Error: The number of MIME types given must be either 1 or equal '
            'to the number of remote locations.',
            err=True,
        )
        return -1

    def validate_file(file_path):
        if not os.path.isfile(file_path):
            click.echo(
                'Error: File "{}" could not be found.'.format(
                    file_path,
                ),
                err=True,
            )
            return False

        file_size = os.path.getsize(file_path)
        if file_size == 0:
            click.echo(
                'Error: File "{}" is empty.'.format(
                    file_path,
                ),
                err=True,
            )
            return False
        elif file_size > MAX_UPLOAD_FILE_SIZE:
            click.echo(
                'Error: File "{}" is {}, larger than the maximum {}.'.format(
                    file_path,
                    humanize.naturalsize(file_size),
                    humanize.naturalsize(MAX_UPLOAD_FILE_SIZE),
                ),
                err=True,
            )
            return False
        return True

    subject_set = SubjectSet.find(upload_state['subject_set_id'])
    if not resumed_upload:
        subject_rows = []
        for manifest_file in upload_state['manifest_files']:
            with open(manifest_file, 'U') as manifest_f:
                file_root = os.path.dirname(manifest_file)
                r = csv.reader(manifest_f, skipinitialspace=True)
                headers = next(r)
                for row in r:
                    metadata = dict(zip(headers, row))
                    files = []
                    if not upload_state['file_column']:
                        upload_state['file_column'] = []
                        for field_number, col in enumerate(row, start=1):
                            file_path = os.path.join(file_root, col)
                            if os.path.exists(file_path):
                                upload_state['file_column'].append(
                                    field_number,
                                )
                                if not validate_file(file_path):
                                    return -1
                                files.append(file_path)
                    else:
                        for field_number in upload_state['file_column']:
                            file_path = os.path.join(
                                file_root,
                                row[field_number - 1]
                            )
                            if not validate_file(file_path):
                                return -1
                            files.append(file_path)

                    for field_number, _mime_type in zip(
                        upload_state['remote_location'],
                        upload_state['mime_type'],
                    ):
                        files.append({_mime_type: row[field_number - 1]})

                    if len(files) == 0:
                        click.echo(
                            'Could not find any files in row:',
                            err=True,
                        )
                        click.echo(','.join(row), err=True)
                        if not upload_state['allow_missing']:
                            return -1
                        else:
                            continue
                    subject_rows.append((files, metadata))

                if not subject_rows:
                    click.echo(
                        'File {} did not contain any rows.'.format(
                            manifest_file,
                        ),
                        err=True,
                    )
                    return -1

        subject_rows = list(enumerate(subject_rows))
        upload_state['waiting_to_upload'] = copy.deepcopy(subject_rows)
    else:
        for subject_id, subject_row in upload_state['waiting_to_link'].items():
            try:
                subject = Subject.find(subject_id)
            except PanoptesAPIException:
                upload_state['waiting_to_upload'].append(subject_row)
                del upload_state['waiting_to_link'][subject_id]
        subject_rows = copy.deepcopy(upload_state['waiting_to_upload'])

    pending_subjects = []

    def move_created(limit):
        while len(pending_subjects) > limit:
            for subject, subject_row in pending_subjects:
                if subject.async_save_result:
                    pending_subjects.remove((subject, subject_row))
                    upload_state['waiting_to_upload'].remove(subject_row)
                    upload_state['waiting_to_link'][subject.id] = subject_row
            time.sleep(0.5)

    def link_subjects(limit):
        if len(upload_state['waiting_to_link']) > limit:
            subject_set.add(list(upload_state['waiting_to_link'].keys()))
            upload_state['waiting_to_link'].clear()

    with click.progressbar(
        subject_rows,
        length=len(subject_rows),
        label='Uploading subjects',
    ) as _subject_rows:
        try:
            with Subject.async_saves():
                for subject_row in _subject_rows:
                    count, (files, metadata) = subject_row
                    subject = Subject()
                    subject.links.project = subject_set.links.project
                    for media_file in files:
                        subject.add_location(media_file)
                    subject.metadata.update(metadata)
                    subject.save()

                    pending_subjects.append((subject, subject_row))

                    move_created(MAX_PENDING_SUBJECTS)
                    link_subjects(LINK_BATCH_SIZE)

            move_created(0)
            link_subjects(0)
        finally:
            if (
                len(pending_subjects) > 0
                or len(upload_state['waiting_to_link']) > 0
            ):
                click.echo('Error: Upload failed.', err=True)
                if click.confirm(
                    'Would you like to save the upload state to resume the '
                    'upload later?',
                    default=True,
                ):
                    while True:
                        state_file_name = 'panoptes-upload-{}.yaml'.format(
                            subject_set_id,
                        )
                        state_file_name = click.prompt(
                            'Enter filename to save to',
                            default=state_file_name,
                        )

                        if not state_file_name.endswith('.yaml'):
                            click.echo(
                                'Error: File name must end in ".yaml".',
                                err=True,
                            )
                            if click.confirm(
                                'Save to {}.yaml?'.format(state_file_name),
                                default=True,
                            ):
                                state_file_name += '.yaml'
                            else:
                                continue
                        if not is_valid_filename(state_file_name):
                            click.echo(
                                'Error: {} is not a valid file name'.format(
                                    state_file_name,
                                ),
                                err=True,
                            )
                            sanitized_filename = sanitize_filename(
                                state_file_name,
                            )
                            if click.confirm(
                                'Save to {}?'.format(
                                    sanitized_filename,
                                ),
                                default=True,
                            ):
                                state_file_name = sanitized_filename
                            else:
                                continue
                        if os.path.exists(state_file_name):
                            if not click.confirm(
                                'File {} already exists. Overwrite?'.format(
                                    state_file_name,
                                ),
                                default=False,
                            ):
                                continue
                        break

                    with open(state_file_name, 'w') as state_file:
                        yaml.dump(upload_state, state_file)
예제 #33
0
    subject_set.links.project = project
    subject_set.display_name = set_name
    subject_set.save()

print('Uploading subjects, This could take a while!')
new_subjects = 0
old_subjects = 0
failed_subjects = 0
working_on = []
#  loop over the preloaded manifest file
for metadata in manifest_list:
    working_on = [metadata['subject'], metadata['image1']]
    #  test for previously uploaded
    if metadata['image1'] not in previous_subjects:
        try:
            subject = Subject()
            subject.links.project = project
            #  find the files in the metadata listing and add their locations
            for file in list(metadata.values())[1:]:
                if file.find('.jpg') > 0:
                    subject.add_location(directory + os.sep + file)
            # update subject metadata
            subject.metadata.update(metadata)
            # again nothing happens until these wo line below, comment them out for testing
            subject.save()
            subject_set.add(subject.id)
            new_subjects += 1
            build_part = '{} successfully uploaded at {}'.format(working_on, str(datetime.now())[0:19]) + '\n'
        except panoptes_client.panoptes.PanoptesAPIException:
            failed_subjects += 1
            build_part = 'An error occurred during the upload of {}'.format(working_on) + '\n'
예제 #34
0
    def upload_chunks(self,
                      chunks: str,
                      project_id: int,
                      set_name: str,
                      zooniverse_login="",
                      zooniverse_pwd="",
                      amount: int = 1000,
                      ignore_errors: bool = False,
                      **kwargs):
        """Uploads ``amount`` audio chunks from the CSV dataframe `chunks` to a zooniverse project.

        :param chunks: path to the chunk CSV dataframe
        :type chunks: [type]
        :param project_id: zooniverse project id
        :type project_id: int
        :param set_name: name of the subject set
        :type set_name: str
        :param zooniverse_login: zooniverse login. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_LOGIN`` instead, defaults to ''
        :type zooniverse_login: str, optional
        :param zooniverse_pwd: zooniverse password. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_PWD`` instead, defaults to ''
        :type zooniverse_pwd: str, optional
        :param amount: amount of chunks to upload, defaults to 0
        :type amount: int, optional
        """

        self.chunks_file = chunks
        self.get_credentials(zooniverse_login, zooniverse_pwd)

        metadata_location = os.path.join(self.chunks_file)
        try:
            self.chunks = pd.read_csv(metadata_location, index_col="index")
        except:
            raise Exception("cannot read chunk metadata from {}.".format(
                metadata_location))

        assert_dataframe("chunks", self.chunks)
        assert_columns_presence(
            "chunks",
            self.chunks,
            {"recording_filename", "onset", "offset", "uploaded", "mp3"},
        )

        from panoptes_client import Panoptes, Project, Subject, SubjectSet

        Panoptes.connect(username=self.zooniverse_login,
                         password=self.zooniverse_pwd)
        zooniverse_project = Project(project_id)

        subjects_metadata = []
        uploaded = 0

        subject_set = None

        for ss in zooniverse_project.links.subject_sets:
            if ss.display_name == set_name:
                subject_set = ss

        if subject_set is None:
            subject_set = SubjectSet()
            subject_set.links.project = zooniverse_project
            subject_set.display_name = set_name
            subject_set.save()

        subjects = []

        chunks_to_upload = self.chunks[self.chunks["uploaded"] == False].head(
            amount)
        chunks_to_upload = chunks_to_upload.to_dict(orient="index")

        if len(chunks_to_upload) == 0:
            print("nothing left to upload.")
            return

        for chunk_index in chunks_to_upload:
            chunk = chunks_to_upload[chunk_index]

            print("uploading chunk {} ({},{})".format(
                chunk["recording_filename"], chunk["onset"], chunk["offset"]))

            subject = Subject()
            subject.links.project = zooniverse_project
            subject.add_location(
                os.path.join(os.path.dirname(self.chunks_file), "chunks",
                             chunk["mp3"]))
            subject.metadata["date_extracted"] = chunk["date_extracted"]

            try:
                subject.save()
            except Exception as e:
                print("failed to save chunk {}. an exception has occured:\n{}".
                      format(chunk_index, str(e)))
                print(traceback.format_exc())

                if args.ignore_errors:
                    continue
                else:
                    print("subject upload halting here.")
                    break

            subjects.append(subject)

            chunk["index"] = chunk_index
            chunk["zooniverse_id"] = str(subject.id)
            chunk["project_id"] = str(project_id)
            chunk["subject_set"] = str(subject_set.display_name)
            chunk["uploaded"] = True
            subjects_metadata.append(chunk)

        if len(subjects) == 0:
            return

        subject_set.add(subjects)

        self.chunks.update(pd.DataFrame(subjects_metadata).set_index("index"))

        self.chunks.to_csv(self.chunks_file)
예제 #35
0
 if os.path.isfile('./manga_mpl4_cutouts/cutouts/{0}.jpg'.format(row['MANGAID'].decode('utf-8'))):
     if counter < 75:
         if np.isnan(row['t01_smooth_or_features_a02_features_or_disk_weighted_fraction']):
             pbar = 'NaN'
             pspiral = 'NaN'
             dr8id = 'NaN'
             dr7id = 'NaN'
             specid = 'NaN'
         else:
             pbar = row['t01_smooth_or_features_a02_features_or_disk_weighted_fraction']*row['t02_edgeon_a05_no_weighted_fraction']*row['t03_bar_a06_bar_weighted_fraction']
             pspiral = row['t01_smooth_or_features_a02_features_or_disk_weighted_fraction']*row['t02_edgeon_a05_no_weighted_fraction']*row['t04_spiral_a08_spiral_weighted_fraction']
             dr8id = row['dr8objid']
             dr7id = row['dr7objid']
             specid = row['specobjid']
         summer += 1
         subject = Subject()
         subject.links.project = project
         subject.add_location('./manga_mpl4_cutouts/cutouts/{0}.jpg'.format(row['MANGAID'].decode('utf-8')))
         subject.metadata['RA'] = row['RA']
         subject.metadata['DEC'] = row['DEC']
         subject.metadata['MANGAID'] = row['MANGAID'].decode('utf-8')
         subject.metadata['Z'] = row['Z']
         subject.metadata['PETROTH50'] = row['PETROTH50']
         subject.metadata['#MANGA_TILEID'] = row['MANGA_TILEID']
         subject.metadata['#NSAID'] = row['NSAID']
         subject.metadata['#SERSIC_TH50'] = row['SERSIC_TH50']
         subject.metadata['#P(Bar)'] = pbar
         subject.metadata['#P(Spiral)'] = pspiral
         subject.metadata['#specobjid'] = specid
         subject.metadata['#dr8objid'] = dr8id
         subject.metadata['#dr7objid'] = dr7id