예제 #1
0
    def run(self):
        """
        Migrate segmented railroad rows.
        """
        subjects_and_their_target_sets = self._calculate_target_subject_sets_by_subject(
        )
        segmented_rows_and_their_target_sets = self \
            ._segmented_row_target_sets(subjects_and_their_target_sets)
        additions_by_target_set = defaultdict(list)
        removals_by_target_set = defaultdict(list)

        for subject_id, target_subject_set_id in segmented_rows_and_their_target_sets.items(
        ):
            # target_subject_set = self._get_subject_set(target_subject_set_id)
            self._logger.debug('Saving segmented row %d to set: %s',
                               subject_id, target_subject_set_id)
            subject = Subject.find(subject_id)
            additions_by_target_set[target_subject_set_id].append(subject)

            for curr_subject_set in subject.links.subject_sets:
                removals_by_target_set[curr_subject_set.id].append(subject_id)

        # Remove to appropriate target sets
        for target_subject_set_id, new_subjects in additions_by_target_set.items(
        ):
            target_subject_set = self._get_subject_set(target_subject_set_id)
            target_subject_set.add(new_subjects)
예제 #2
0
 def _hydrate_book_and_page(cls, row):
     subject = Subject.find(row['subject_id'])
     subject_model = SubjectModel(subject)
     subject.metadata['book'] = subject_model['book']
     subject.metadata['page'] = subject_model['page']
     for field in cls.BOOK_AND_PAGE_FIELDS:
         if subject.metadata[field] is None:
             raise ValueError("WARN: None '%s' for subject %d and filepath %s" % field,
                 subject.id, subject.metadata['filepath'])
     subject.save()
예제 #3
0
    def link_new_set(self, subject_set_id):
        """

        :param subject_set_id:
        :return:
        """
        workflowSet = Workflow()
        subject = Subject()
        subject.links.project = self.project
        sset = subject.find(subject_set_id)

        print(1)
        workflowSet.links.project = self.project
        print(2)
        workflowSet.links.sub(sset)
 def queue_new_subject_creation(cls, subject_id, vertex_centroids, target_subject_set_id):
     """
     Given subject ID and vertex centroids, fetch subject image and perform segmentation.
     Static-w/-instance-of-self pattern to support enqueuing in RQ.
     """
     logger = setup_logger(cls.LOGGER_NAME, 'log/queue_operations.log')
     queue_ops = QueueOperations(logger)
     subject = Subject.find(subject_id)
     subject_image_path = queue_ops.fetch_subject_image_to_tmp(subject)
     column_image_paths = queue_ops.perform_column_segmentation(
         subject_id,
         subject_image_path,
         vertex_centroids
     )
     for column_image_path in column_image_paths:
         queue_ops.upscale_small_images(column_image_path)
     row_paths_by_column = queue_ops.perform_row_segmentation(column_image_paths)
     queue_ops.push_new_row_subjects(subject, target_subject_set_id, row_paths_by_column)
예제 #5
0
def ls(subject_set_id, quiet, subject_ids):
    """
    Lists subject IDs and their media URLs.
    """

    if subject_ids:
        for subject_id in subject_ids:
            subject = Subject.find(subject_id)
            if quiet:
                click.echo(subject.id)
            else:
                echo_subject(subject)
        return

    subjects = Subject.where(subject_set_id=subject_set_id)
    if quiet:
        click.echo(" ".join([s.id for s in subjects]))
    else:
        for subject in subjects:
            echo_subject(subject)
예제 #6
0
with open(SUBJECT_ID_FILE) as subject_id_f:
    subject_ids = [ s.strip() for s in subject_id_f.readlines() ]

Panoptes.connect(**config)

with ChargingBar(
    'Updating',
    max=len(subject_ids),
    suffix='%(percent).1f%% %(eta_td)s'
) as bar:
    with Subject.async_saves():
        for subject_id in subject_ids:
            bar.next()

            subject = Subject.find(subject_id)

            if '!CERiT' in subject.metadata:
                continue

            superwasp_id = subject.metadata.get('Filename', subject.metadata.get('filename')).split('_')[0]
            coords = superwasp_id.replace('1SWASP', '')
            coords_quoted = urllib.parse.quote(coords)
            ra = urllib.parse.quote('{}:{}:{}'.format(
                coords[1:3],
                coords[3:5],
                coords[5:10]
            ))
            dec = urllib.parse.quote('{}:{}:{}'.format(
                coords[10:13],
                coords[13:15],
예제 #7
0
def run():
    """
    Query for completed subjects, calculate kmeans vertex centroids, fetch subject images, split
    columns by centroids, row segmentatino with Ocropy.
    """

    logger = setup_logger(settings.APP_NAME,
                          'log/kmeans_and_enqueue_completed_subjects.log',
                          logging.DEBUG)

    subject_set_csv = SubjectSetCSV()
    workflow_router = SubjectSetWorkflowRouter(subject_set_csv, settings,
                                               logger)
    pages_raw_subject_ids = subject_set_csv.raw_pages_subject_ids()
    logger.debug("Running Wires and Rails Workflow Processor")
    Panoptes.connect(username=settings.PANOPTES_USERNAME,
                     password=settings.PANOPTES_PASSWORD)

    retired_subject_ids = []

    vertices_and_target_subject_sets = []

    for _subject_set_id, metadata in settings.COLUMNS_WORKFLOW_METADATA.items(
    ):

        logger.debug("Loading vertices / subject retirement info for %(debug_name)s subject set " \
            "(subject set id: %(subject_set_id)d; workflow id: %(workflow_id)d; task id: " \
            " %(task_id)s", metadata)

        classification_kwargs = {
            'scope': 'project',
            'project_id': settings.PROJECT_ID,
            'workflow_id': metadata['workflow_id']
        }
        logger.debug("Loading classifications by params %s",
                     str(classification_kwargs))
        classifications_records = [
            c for c in Classification.where(**classification_kwargs)
        ]

        classifications = VertexClassifications(classifications_records,
                                                pages_raw_subject_ids)

        # Aggregate vertex centroids
        centroids_by_subject = classifications.vertex_centroids(
            metadata['task_id'])
        for subject_id, centroids in centroids_by_subject.items():
            # Find target subject set ID, or log and skip the subject
            try:
                target_subject_set_id = workflow_router \
                    .target_subject_set_id(subject_id, classifications_records)
            except UnidentifiedRawSubjectSetException as ex:
                logger.error(ex.args[0])
                continue
            except SharedMajorityException as ex:
                # TODO need add'l monitoring for this, e.g. manual report exception
                logger.error(ex.args[0])
                continue
            vertices_and_target_subject_sets.append(
                [subject_id, centroids, target_subject_set_id])

        # Aggregate retired subjects
        workflow = Workflow.find(metadata['workflow_id'])
        retirement_count = workflow.retirement['options']['count']
        retired_subject_ids += classifications.retired_subject_ids(
            metadata['task_id'], retirement_count)

    logger.debug(
        'Retrieved the following subject centroids for image segmentation: %s',
        str(vertices_and_target_subject_sets))

    logger.debug('For the following retired subject IDs: %s',
                 str(retired_subject_ids))

    queue = Queue(connection=Redis(host=settings.REDIS_HOST))

    for subject_id, centroids, target_subject_set_id in vertices_and_target_subject_sets:
        if subject_id not in retired_subject_ids:
            continue
        subject = Subject.find(subject_id)
        if settings.METADATA_KEY_ALREADY_PROCESSED in subject.metadata and \
           subject.metadata[settings.METADATA_KEY_ALREADY_PROCESSED]:
            logger.debug('Skipping subject id %d; already processed.',
                         subject_id)
            continue
        logger.debug('Enqueuing subjects id: %d', subject_id)
        queue.enqueue(QueueOperations.queue_new_subject_creation,
                      subject_id,
                      centroids,
                      target_subject_set_id,
                      timeout=2 * 60 * 60)
        QueueOperations.flag_subject_as_queued(subject)
예제 #8
0
def delete(force, subject_ids):
    for subject_id in subject_ids:
        if not force:
            click.confirm('Delete subject {}?'.format(subject_id), abort=True)
        Subject.find(subject_id).delete()
예제 #9
0
def info(subject_id):
    subject = Subject.find(subject_id)
    click.echo(yaml.dump(subject.raw))
예제 #10
0
def get_subject(subject_id):
    return Subject.find(subject_id)
예제 #11
0
def ls(subject_id):
    echo_subject(Subject.find(subject_id))