Ejemplo n.º 1
0
def cli(ctx, endpoint, admin):
    ctx.config_dir = os.path.expanduser('~/.panoptes/')
    ctx.config_file = os.path.join(ctx.config_dir, 'config.yml')
    ctx.config = {
        'endpoint': 'https://www.zooniverse.org',
        'username': '',
        'password': '',
    }

    try:
        with open(ctx.config_file) as conf_f:
            ctx.config.update(yaml.full_load(conf_f))
    except IOError:
        pass

    if endpoint:
        ctx.config['endpoint'] = endpoint

    if ctx.invoked_subcommand != 'configure':
        Panoptes.connect(
            endpoint=ctx.config['endpoint'],
            username=ctx.config['username'],
            password=ctx.config['password'],
            admin=admin,
        )
Ejemplo n.º 2
0
 def __connect(self):
     """
     Connect to the panoptes client api
     :return:
     """
     Panoptes.connect(username=self.username, password=self.password)
     return Project.find(self.project_id)
Ejemplo n.º 3
0
def add_to_subject_set(subject_set_id,
                       subject_set_file,
                       username=None,
                       password=None):
    """
    Import a 1 column file of subject_ids to a subject_set.

    Parameters
    ----------
    subject_set_id : str
        subject set ID linked to the web interface

    subject_set_file : str
        one-column file of subject IDs (output of cull_subject_ids)

    username, password : str, str
        if passed, will add subject set ids to the subject set on the web.
    """
    lines = []
    with open(subject_set_file) as subject_ids:
        lines.append(subject_ids.read().splitlines())

    if username is not None:
        try:
            from panoptes_client import Panoptes, SubjectSet
        except ImportError:
            print(
                'Install https://github.com/zooniverse/panoptes-python-client')
            sys.exit(1)

        Panoptes.connect(username=username, password=password)
        subject_set = SubjectSet.find(subject_set_id)
        subject_set.add(np.unique(lines))
    return
Ejemplo n.º 4
0
def find_duplicates():
    Panoptes.connect(username='******', password=getpass())

    gzb_project = Project.find(slug='tingard/galaxy-builder')

    subject_sets = []
    for set in gzb_project.links.subject_sets:
        subject_sets.append(list(set.subjects))

    subjects = [j for i in subject_sets for j in i]

    subject_set_ids = [[np.int64(j.id) for j in i] for i in subject_sets]
    ids = [int(i.id) for i in subjects]
    dr7objids = [np.int64(i.metadata.get('SDSS dr7 id', False)) for i in subjects]

    pairings = sorted(zip(ids, dr7objids), key=lambda i: i[0])
    df = pd.DataFrame(pairings, columns=('subject_id', 'dr7objid'))
    df = df[df['dr7objid'] != 0].groupby('subject_id').max()
    n_sids = len(df)
    n_dr7ids = len(df.groupby('dr7objid'))
    print('{} unique subject ids'.format(n_sids))
    print('{} unique dr7 object ids'.format(n_dr7ids))
    print('{} duplicate galaxies'.format(n_sids - n_dr7ids))


    groups = np.array([np.concatenate(([i[0]], i[1].index.values)) for i in df.groupby('dr7objid') if len(i[1]) > 1])
    # okay, what subject sets are our duplicates?
    s1 = gzb_project.links.subject_sets[
        np.argmax([np.all(np.isin(subject_set_ids[i], groups[:, 1])) for i in range(len(subject_set_ids))])
    ]
    s2 = gzb_project.links.subject_sets[
        np.argmax([np.all(np.isin(subject_set_ids[i], groups[:, 2])) for i in range(len(subject_set_ids))])
    ]
    print(s1, s2)
    return groups
Ejemplo n.º 5
0
def token():
    """
    Returns the current oauth token and its expiration date.
    """

    click.echo("Token: {}".format(Panoptes.client().get_bearer_token()))
    click.echo("Expiry time: {}".format(Panoptes.client().bearer_expires))
def make_tutorial_images(imagePaths, ellipseData, projectData):
    # Connect to Panoptes
    Panoptes.connect(
        username=projectData["user_name"], password=projectData["password"]
    )

    newSubjects = []
    for imageId, imagePath in enumerate(imagePaths):
        print(f"Adding {imagePath}...")
        try:
            subjectSet = SubjectSet.find(projectData["subject_set"])
        except PanoptesAPIException as e:
            print(e)
            return
        newSubject = Subject()
        newSubject.add_location(imagePath)
        newSubject.links.project = subjectSet.links.project
        newSubject.metadata.update(
            make_metadata(
                ellipseData.get_group(imageId).reset_index(drop=True), imagePath
            )
        )
        newSubject.save()
        newSubjects.append(newSubject)
    subjectSet.add(newSubjects)
Ejemplo n.º 7
0
def upload_manifest_to_galaxy_zoo(subject_set_name,
                                  manifest,
                                  galaxy_zoo_id='5733',
                                  n_processes=10):
    """
    Save manifest (set of galaxies with metadata prepared) to Galaxy Zoo

    Args:
        subject_set_name (str): name for subject set
        manifest (list): containing dicts of form {png_loc: img.png, key_data: {metadata_col: metadata_value}}
        galaxy_zoo_id (str): panoptes project id e.g. '5733' for Galaxy Zoo, '6490' for mobile
        n_processes (int): number of processes with which to upload galaxies in parallel

    Returns:
        None
    """
    if 'TEST' in subject_set_name:
        logging.warning('Testing mode detected - not uploading!')
        return manifest

    if galaxy_zoo_id == '5733':
        logging.info('Uploading to Galaxy Zoo project 5733')
    elif galaxy_zoo_id == '6490':
        logging.info('Uploading to mobile app project 6490')
    else:
        logging.info('Uploading to unknown project {}'.format(galaxy_zoo_id))

    # Important - don't commit the password!
    zooniverse_login = read_data_from_txt(zooniverse_login_loc)
    Panoptes.connect(**zooniverse_login)

    galaxy_zoo = Project.find(galaxy_zoo_id)

    subject_set = SubjectSet()

    subject_set.links.project = galaxy_zoo
    subject_set.display_name = subject_set_name
    subject_set.save()

    pbar = tqdm(total=len(manifest), unit=' subjects uploaded')

    save_subject_params = {'project': galaxy_zoo, 'pbar': pbar}
    save_subject_partial = functools.partial(save_subject,
                                             **save_subject_params)
    pool = ThreadPool(n_processes)
    new_subjects = pool.map(save_subject_partial, manifest)
    pbar.close()
    pool.close()
    pool.join()

    # new_subjects = []
    # for subject in manifest:
    #     print(subject)
    #     new_subjects.append(save_subject_partial(subject))

    subject_set.add(new_subjects)

    return manifest  # for debugging only
Ejemplo n.º 8
0
def get_panoptes_auth_token():
    # This token is only valid for ~2 hours. Don't use for long-running downloads.
    # Here, we only need a few calls to get the workflow versions
    # Will ask the devs to expose this nicely with the already-built expiry check.
    with open(api_to_json.ZOONIVERSE_LOGIN_LOC,
              'r') as f:  # beware sneaky shared global state
        zooniverse_login = json.load(f)
    Panoptes.connect(**zooniverse_login)
    return Panoptes._local.panoptes_client.get_bearer_token()
Ejemplo n.º 9
0
def get_authenticated_panoptes(bearer_token, bearer_expiry):
    guest_authenticated_panoptes = Panoptes(endpoint=PanoptesUtils.base_url())

    guest_authenticated_panoptes.bearer_token = bearer_token
    guest_authenticated_panoptes.logged_in = True
    bearer_expiry = datetime.strptime(bearer_expiry, "%Y-%m-%d %H:%M:%S.%f")
    guest_authenticated_panoptes.bearer_expires = (bearer_expiry)

    return guest_authenticated_panoptes
Ejemplo n.º 10
0
def _retrieve_user(user_id):
    if user_id in users:
        user = users[user_id]
    else:
        Panoptes.connect(endpoint=getenv('PANOPTES_URL',
                                         'https://panoptes.zooniverse.org/'),
                         client_id=getenv('PANOPTES_CLIENT_ID'),
                         client_secret=getenv('PANOPTES_CLIENT_SECRET'))

        user = User.find(user_id)
        users[user_id] = user

    return user
Ejemplo n.º 11
0
def main(production=False):
    uname = input('Enter your username: '******'https://panoptes-staging.zooniverse.org',
        admin=True
    )
    pId = 5733  # if production else 1820
    project = Project.find(pId)
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = 'Test_subject_set_' + str(int(time.time()))
    subject_set.save()

    loc = os.path.abspath(os.path.dirname(__file__))
    subjects = os.listdir(loc + '/subjects')
    images, differences, model, metadata = [
        sorted((
            int(re.match(r'{}_([0-9]+)\.(?:json|png)$'.format(s), i).group(1))
            for i in subjects
            if re.match(r'{}_([0-9]+)\.(?:json|png)$'.format(s), i)
        ))
        for s in ('difference', 'image', 'model', 'metadata')
    ]
    if not images == differences == model == metadata:
        print(
            'Images, differences, model and metadata '
            + 'must all have same length'
        )

    # TODO: change subject directory structure to be more efficient
    #       (not having 12,000+ files in a folder...)
    for i in images:
        try:
            with open('{}/subjects/metadata_{}.json'.format(loc, i)) as f:
                metadata = json.load(f)
        except IOError:
            metadata = {}
        subject_set = uploadSubjectToSet(
            project, subject_set,
            [[j.format(loc, i) for j in (
                '{}/subjects/image_{}.png',
                '{}/subjects/difference_{}.json',
                '{}/subjects/model_{}.json'
            )]],  # locations
            [metadata],
        )
Ejemplo n.º 12
0
def create_subject_set(folder_name, set_name='test_subject_set'):
    subject_names = [
        i.group(1)
        for i in (
            re.match(r'image_(.*?).png', f)
            for f in os.listdir(folder_name)
        )
        if i is not None
    ]
    files = [
        [
            join(folder_name, file_name)
            for file_name in (
                'image_{}.png'.format(subject_name),
                'difference_{}.json'.format(subject_name),
                'model_{}.json'.format(subject_name),
                'metadata_{}.json'.format(subject_name),
            )
        ]
        for subject_name in subject_names
    ]
    assert all(os.path.exists(j) for i in files for j in i), 'Missing files!'
    uname = input('Enter your username: ')
    pwd = getpass.getpass()
    Panoptes.connect(
        username=uname,
        password=pwd,
        admin=True
    )
    pId = 5590
    project = Project.find(pId)
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = set_name
    subject_set.save()
    metadata_list = []
    for fs in files:
        try:
            with open(fs[3]) as metaF:
                metadata = json.load(metaF)
        except IOError:
            metadata = {}
        metadata_list.append(metadata)
    subject_set = uploadSubjectToSet(
        project, subject_set,
        [i[:3] for i in files],
        metadata_list,
    )
Ejemplo n.º 13
0
def info(user_id, email, login):
    """
    Displays information about a user. Defaults to the current user if no ID or
    search criteria are given.
    """

    if (user_id and email) or (user_id and login) or (email and login):
        click.echo(
            'Error: At most only one of user ID, login, or email may be '
            'specified.',
            err=True,
        )
        return -1
    if user_id:
        user = User.find(user_id)
    elif email:
        try:
            user = next(User.where(email=email))
        except StopIteration:
            user = None
        if getattr(user, 'email', '') != email:
            click.echo('User not found', err=True)
            return -1
    else:
        if not login:
            login = Panoptes.client().username
        try:
            user = next(User.where(login=login))
        except StopIteration:
            user = None
        if getattr(user, 'login', '') != login:
            click.echo('User not found', err=True)
            return -1
    click.echo(yaml.dump(user.raw))
Ejemplo n.º 14
0
def _retrieve_user(user_id):
    if user_id in users:
        user = users[user_id]
    else:
        Panoptes.connect(endpoint=getenv('PANOPTES_URL',
                                         'https://panoptes.zooniverse.org/'),
                         client_id=getenv('PANOPTES_CLIENT_ID'),
                         client_secret=getenv('PANOPTES_CLIENT_SECRET'))
        try:
            user = User.find(user_id)
        except PanoptesAPIException:
            # some users are not found in panoptes
            # return an empty class with an `id` attribute
            user = CantFindUser(user_id)
        users[user_id] = user

    return user
Ejemplo n.º 15
0
def panoptes_connect():
    # file with username and password on the first line with a space in between
    panoptesuserfile = 'panoptesuserfile.txt'

    with open(panoptesuserfile) as fp:
        uinfo = (fp.readline()).strip().split()

    return Panoptes.connect(username=uinfo[0], password=uinfo[1])
Ejemplo n.º 16
0
    def get_user_details(self, response):
        authenticated_panoptes = Panoptes(
            endpoint=PanoptesUtils.base_url(),
            client_id=PanoptesUtils.client_id(),
            client_secret=PanoptesUtils.client_secret())

        authenticated_panoptes.bearer_token = response['access_token']
        authenticated_panoptes.logged_in = True
        authenticated_panoptes.refresh_token = response['refresh_token']

        bearer_expiry = datetime.now() + timedelta(
            seconds=response['expires_in'])
        authenticated_panoptes.bearer_expires = (bearer_expiry)

        with authenticated_panoptes:
            user = authenticated_panoptes.get('/me')[0]['users'][0]

            ids = ['admin user']
            if not user['admin']:
                ids = [
                    project.href for project in Project.where(
                        current_user_roles='collaborator')
                ]

            return {
                'username': user['login'],
                'email': user['email'],
                'is_superuser': user['admin'],
                'projects': ids
            }
Ejemplo n.º 17
0
def upload_images(id, use_database=True):
    print('Create subject set and upload images for', id)
    if use_database:
        update_status(id, gz_status='Uploading')
    wd = os.getcwd()
    Panoptes.connect(username='******',
                     password=os.environ['PANOPTES_PASSWORD'])
    os.chdir(target + id)
    project = Project.find(slug='chrismrp/radio-galaxy-zoo-lofar')
    subject_set = SubjectSet()

    subject_set.display_name = id
    subject_set.links.project = project
    subject_set.save()
    print('Made subject set')
    new_subjects = []
    g = glob.glob('*-manifest.txt')
    for i, f in enumerate(g):
        bits = open(f).readlines()[0].split(',')
        metadata = {
            'subject_id': int(bits[0]),
            'ra': float(bits[5]),
            'dec': float(bits[6]),
            '#size': float(bits[7]),
            'source_name': bits[4]
        }
        print('Upload doing', bits[4], '%i/%i' % (i, len(g)))
        subject = Subject()
        subject.links.project = project
        subject.metadata.update(metadata)
        for location in bits[1:4]:
            subject.add_location(location)
        subject.save()
        new_subjects.append(subject)

    subject_set.add(new_subjects)

    workflow = Workflow(11973)
    workflow.links.subject_sets.add(subject_set)
    if use_database:
        update_status(id, gz_status='In progress')
    print('Done!')
Ejemplo n.º 18
0
def main(argv=None):
    """Main caller for workflow1to2"""
    parser = argparse.ArgumentParser(
        description="Add subject sets to workflow 2a,b,c from classifications")

    parser.add_argument('username', type=str, help='zooniverse username')

    parser.add_argument('password', type=str, help='password')

    parser.add_argument('-o', '--overwrite', action='store_true')
    parser.add_argument('-a', '--add', action='store_true')

    args = parser.parse_args(argv)
    # log in.
    Panoptes.connect(username=args.username, password=args.password)

    classifications_filename = 'astronomy-rewind-classifications.csv'
    wf1_filename = 'astronomy-rewind-classifications_wf1.csv'

    cull_wf1(classifications_filename, wf1_filename, overwrite=args.overwrite)
    cull_subject_ids(wf1_filename, overwrite=args.overwrite, add=args.add)
Ejemplo n.º 19
0
def main(production=False):
    uname = input('Enter your username: '******'https://panoptes-staging.zooniverse.org',
                     admin=True)
    pId = 5590 if production else 1820
    project = Project.find(pId)
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = 'Test_subject_set_' + str(int(time.time()))
    subject_set.save()

    loc = os.path.abspath(os.path.dirname(__file__))

    subjects = os.listdir(loc + '/subjects')

    # TODO: change subject directory structure to be more efficient
    #       (not having 12,000+ files in a folder...)
    for i in range(20):
        if 'image_{}.png'.format(i) in subjects:
            try:
                with open('{}/subjects/metadata_{}.json'.format(loc, i)) as f:
                    metadata = json.load(f)
            except IOError:
                metadata = {}
            subject_set = uploadSubjectToSet(
                project,
                subject_set,
                [[
                    j.format(loc, i)
                    for j in ('{}/subjects/image_{}.png',
                              '{}/subjects/difference_{}.json',
                              '{}/subjects/model_{}.json')
                ]],  # locations
                [metadata],
            )
        else:
            break
Ejemplo n.º 20
0
def cli(ctx, endpoint):
    ctx.config_dir = os.path.join(os.environ['HOME'], '.panoptes')
    ctx.config_file = os.path.join(ctx.config_dir, 'config.yml')
    ctx.config = {
        'endpoint': 'https://panoptes.zooniverse.org',
        'username': '',
        'password': '',
    }

    try:
        with open(ctx.config_file) as conf_f:
            ctx.config.update(yaml.load(conf_f))
    except IOError:
        pass

    if endpoint:
        ctx.config['endpoint'] = endpoint

    Panoptes.connect(
        endpoint=ctx.config['endpoint'],
        username=ctx.config['username'],
        password=ctx.config['password']
    )
Ejemplo n.º 21
0
    def create_subjects_and_link_to_project(self, proto_subjects, project_id,
                                            workflow_id, subject_set_id):

        try:
            USERNAME = os.getenv('PANOPTES_USERNAME')
            PASSWORD = os.getenv('PANOPTES_PASSWORD')
            Panoptes.connect(username=USERNAME,
                             password=PASSWORD,
                             endpoint=self.ENDPOINT)

            project = Project.find(project_id)
            workflow = Workflow().find(workflow_id)

            if subject_set_id == None:
                subject_set = SubjectSet()
                ts = time.gmtime()
                subject_set.display_name = time.strftime(
                    "%m-%d-%Y %H:%M:%S", ts)
                subject_set.links.project = project

                subject_set.save()
            else:
                subject_set = SubjectSet().find(subject_set_id)
            subjects = []
            for proto_subject in proto_subjects:
                subject = Subject()
                subject.links.project = project
                subject.add_location(proto_subject['location_lc'])
                subject.add_location(proto_subject['location_ps'])
                subject.metadata.update(proto_subject['metadata'])
                subject.save()
                subjects.append(subject)

            subject_set.add(subjects)
            workflow.add_subject_sets(subject_set)
        except Exception:
            self.log.exception("Error in create_subjects_and_link_to_project ")
Ejemplo n.º 22
0
    def get_user_details(self, response):
        with Panoptes() as p:
            p.bearer_token = response['access_token']
            p.logged_in = True
            p.refresh_token = response['refresh_token']
            p.bearer_expires = (datetime.now() +
                                timedelta(seconds=response['expires_in']))

            user = p.get('/me')[0]['users'][0]

            ids = ['admin user']
            if not user['admin']:
                ids = [project.id for project in Project.where()]

            return {
                'username': user['login'],
                'email': user['email'],
                'is_superuser': user['admin'],
                'projects': ids,
            }
Ejemplo n.º 23
0
    def apply(self, filenames):
        if self.pipeline.multiple_subject_sets:
            scope = self.bundle
        else:
            scope = self.pipeline

        theia_authenticated_client = Panoptes(
            endpoint=PanoptesUtils.base_url(),
            client_id=PanoptesUtils.client_id(),
            client_secret=PanoptesUtils.client_secret()
        )

        with theia_authenticated_client:
            target_set = self._get_subject_set(scope, self.project.id, scope.name_subject_set())

            using_manifest = False
            metadata_dictionary = {}

            path_example = filenames[0]
            manifest_file_location = path.join((path.dirname(path_example) + "_interstitial_products"), "manifest.csv")
            if self.include_metadata and path.exists(manifest_file_location):
                using_manifest = True
                with open(manifest_file_location, newline='') as csvfile:
                    reader = csv.DictReader(csvfile)
                    for row in reader:
                        metadata_dictionary[row['#filename']] = row

            for filename in filenames:
                img = Image.open(filename)
                img.save(filename, 'png')

                #This line might have to be done with os.path to translate across OSes
                name_only = filename.split("/")[len(filename.split("/")) - 1]

                metadata = {}
                if using_manifest:
                    metadata = metadata_dictionary[name_only]

                new_subject = self._create_subject(self.project.id, filename, metadata=metadata)
                target_set.add(new_subject)
 def get_conn(self):
     if self._panoptes_client is None:
         self.log.info(f"{self.__class__.__name__} version {__version__}")
         self.log.debug(
             f"getting connection information from {self._conn_id}")
         config = self.get_connection(self._conn_id)
         ctyp = config.conn_type or self.DEFAULT_CONN_TYPE
         host = config.host or self.DEFAULT_HOST
         port = config.port or self.DEFAULT_PORT
         slug = config.schema
         login = config.login
         password = config.password
         if config.extra:
             try:
                 extra = json.loads(config.extra)
             except json.decoder.JSONDecodeError:
                 self._auto_disable_subject_sets = False
             else:
                 self._auto_disable_subject_sets = extra.get(
                     "auto_disable_ssets", False)
         if not login:
             raise MissingLoginError(self._conn_id)
         if not password:
             raise MissingPasswordError(self._conn_id)
         if not slug:
             raise MissingSchemaError(self._conn_id)
         project_slug = f"{login}/{slug}"
         endpoint = f"{ctyp}://{host}:{port}"
         self._panoptes_client = Panoptes.connect(username=login,
                                                  password=password,
                                                  endpoint=endpoint)
         self._project = Project.find(slug=project_slug)
         self.log.info(
             f"Searching project by slug {project_slug} found: {self._project}"
         )
     return self._panoptes_client, self._project
Ejemplo n.º 25
0
GZ: specobjid, dr8objid, dr7objid, ra, dec, t01_smooth_or_features_a02_features_or_disk_weighted_fraction,
t02_edgeon_a05_no_weighted_fraction, t03_bar_a06_bar_weighted_fraction, t04_spiral_a08_spiral_weighted_fraction

3) Sky match the tables by RA and DEC in topcat, with the MaNGA data as table 1, and GZ as table 2.
4) Remove the second set of RA and DEC columns, and name the first ones 'RA' and 'DEC'
5) Save this file as a csv
6) This only works in python 2 because it uses panoptes_client
'''

import numpy as np
from panoptes_client import SubjectSet, Subject, Project, Panoptes
import os
import progressbar as pb
myusername = os.environ['PANOPTES_USERNAME']
mypassword = os.environ['PANOPTES_PASSWORD']
Panoptes.connect(username= myusername, password=mypassword)

project = Project.find(id='73')

fullsample = SubjectSet.find(5326)
spirals = SubjectSet.find(5324)
bars = SubjectSet.find(5325)
progress = pb.ProgressBar(widgets=[pb.Bar(), pb.ETA()])
data = np.genfromtxt('../GZ3D/MatchedData.csv', delimiter = ',', names=True, 
                      dtype=[('DEC', float), ('IAUNAME', '|S30'),('IFUTARGETSIZE',int),
                             ('MANGAID', '|S10'),('MANGA_TILEID',int),('NSAID', int),
                             ('PETROTH50',float),('RA',float),('SERSIC_TH50',float),
                             ('Z',float),('specobjid', int),('dr8objid', int),
                             ('dr7objid', int),('t01_smooth_or_features_a02_features_or_disk_weighted_fraction', float),
                             ('t02_edgeon_a05_no_weighted_fraction', float),
                             ('t03_bar_a06_bar_weighted_fraction', float),
Ejemplo n.º 26
0
import yaml

import os
import urllib


SUBJECT_ID_FILE = 'subjects.txt'


with open('config.yaml') as config_f:
    config = yaml.load(config_f, Loader=yaml.FullLoader)

with open(SUBJECT_ID_FILE) as subject_id_f:
    subject_ids = [ s.strip() for s in subject_id_f.readlines() ]

Panoptes.connect(**config)

with ChargingBar(
    'Updating',
    max=len(subject_ids),
    suffix='%(percent).1f%% %(eta_td)s'
) as bar:
    with Subject.async_saves():
        for subject_id in subject_ids:
            bar.next()

            subject = Subject.find(subject_id)

            if '!CERiT' in subject.metadata:
                continue
import re
import urllib2
import os
import getpass
import wikipedia


# ask user for login and object they want to classify
thing = raw_input("What would you like to classify? ")
user = raw_input("Zooniverse username: "******"password: "******"lxml")
Ejemplo n.º 28
0
#!/usr/bin/env python3
"""
Un-flag arbitrary subjects as not processed, useful for debugging workflow processing.
"""

import sys
sys.path.insert(0, "..")

from panoptes_client import Panoptes, Subject

from lib import settings

Panoptes.connect(username=settings.PANOPTES_USERNAME,
                 password=settings.PANOPTES_PASSWORD)

# SUBJECT_IDS = ['5823821', '5823822']
# SUBJECT_IDS = ['14813279', '14813280', '14813281']
# SUBJECT_IDS = ['15327062','15327056','15327068','15327065']

# Telegraph tests -
SUBJECT_IDS = ['15327068', '15327065', '15327062', '15327059', '15327056']

for subject_id in SUBJECT_IDS:
    subject = Subject.find(subject_id)
    subject.metadata[settings.METADATA_KEY_ALREADY_PROCESSED] = False
    subject.save()
Ejemplo n.º 29
0
""" This version is written in Python 3.66
This script attempts to retrieve the exif data from existing subject image files
and add the datetime to the subject metadata. It requires the project owner credentials
to be set up as OS environmental variables, and an appropriate project slug modified
on line 11.  depending on the camera used to take the original subject image the exif
code may be different than that in the code and may need to be modified"""
import os
from PIL import Image, ExifTags
import panoptes_client
from panoptes_client import SubjectSet, Project, Panoptes
import requests
Panoptes.connect(username=os.environ['User_name'], password=os.environ['Password'])
project = Project.find(slug='pmason\fossiltrainer')

while True:
    set_id = input('Entry subject set id to update:' + '\n')
    try:
        subject_set = SubjectSet.find(set_id)
        count_subjects = 0
        subject_list = []
        for subject in subject_set.subjects:
            count_subjects += 1
            if subject.metadata['DateTime'] == '':
                try:
                    img = Image.open(requests.get(subject.locations[0]['image/jpeg'], stream=True).raw)
                    exif_dict = img._getexif()
                    date_time = exif_dict[306]
                except (IOError, KeyError):
                    print('Acquiring exif data for ', subject.id, ' failed')
                    continue
                subject.metadata['DateTime'] = date_time
            # DEFINE ZOONIVERSE SUBJECT METADATA, corresponds to METADATA FROM ITEM above, update both accordingly
            segment['metadata'] = {
                'APA Citation': apa_citation,
                'Date': item_date,
                'Library Of Congress Item ID': loc_id,
                'Source Collection': source_collection,
                'Title': item_title
            }

            segments.append(segment)
    print('Item segments transformation complete.')
    return segments

segments = transform_item_segments('https://www.loc.gov/item/' + LIBRARY_OF_CONGRESS_ITEM_ID)

Panoptes.connect(username=USERNAME, password=PASSWORD, endpoint=ENDPOINT)

project = Project.find(PROJECT)

subject_set = SubjectSet()
subject_set.links.project = project
subject_set.display_name = segments[0]['metadata']['Title'] # uses item Title as default subject set name, or feel free to hardcode
subject_set.save()

print('Begin Zooniverse subject upload...')
for segment in segments:
    subject = Subject()

    subject.links.project = project
    subject.add_location(segment['location'])
B-1180 Brussels
BELGIUM

phone  : +32 (0)2 373.04.19
e-mail : [email protected]
web    : www.aeronomie.be
________________________________________________
"""

filename = "panoptes_test2_export.csv"

from panoptes_client import Project, Panoptes
from panoptes_client.panoptes import PanoptesAPIException
import requests, sys

Panoptes.connect(username=username, password=password)

project = Project.find(slug='zooniverse/radio-meteor-zoo')

#r = project.get_classifications_export(generate=True, wait=True, wait_timeout=1800)
wait_timeout = 60
project.generate_classifications_export()
for attempt in range(60):
    print "wait classification export (attempt %d)" % attempt
    sys.stdout.flush()
    try:
        export = project.wait_classifications_export(wait_timeout)
    except PanoptesAPIException as e:
        print str(e)[:32]
        if str(e)[:32] == "classifications_export not ready":        
            continue
Ejemplo n.º 32
0
def run():
    """
    Query for completed subjects, calculate kmeans vertex centroids, fetch subject images, split
    columns by centroids, row segmentatino with Ocropy.
    """

    logger = setup_logger(settings.APP_NAME,
                          'log/kmeans_and_enqueue_completed_subjects.log',
                          logging.DEBUG)

    subject_set_csv = SubjectSetCSV()
    workflow_router = SubjectSetWorkflowRouter(subject_set_csv, settings,
                                               logger)
    pages_raw_subject_ids = subject_set_csv.raw_pages_subject_ids()
    logger.debug("Running Wires and Rails Workflow Processor")
    Panoptes.connect(username=settings.PANOPTES_USERNAME,
                     password=settings.PANOPTES_PASSWORD)

    retired_subject_ids = []

    vertices_and_target_subject_sets = []

    for _subject_set_id, metadata in settings.COLUMNS_WORKFLOW_METADATA.items(
    ):

        logger.debug("Loading vertices / subject retirement info for %(debug_name)s subject set " \
            "(subject set id: %(subject_set_id)d; workflow id: %(workflow_id)d; task id: " \
            " %(task_id)s", metadata)

        classification_kwargs = {
            'scope': 'project',
            'project_id': settings.PROJECT_ID,
            'workflow_id': metadata['workflow_id']
        }
        logger.debug("Loading classifications by params %s",
                     str(classification_kwargs))
        classifications_records = [
            c for c in Classification.where(**classification_kwargs)
        ]

        classifications = VertexClassifications(classifications_records,
                                                pages_raw_subject_ids)

        # Aggregate vertex centroids
        centroids_by_subject = classifications.vertex_centroids(
            metadata['task_id'])
        for subject_id, centroids in centroids_by_subject.items():
            # Find target subject set ID, or log and skip the subject
            try:
                target_subject_set_id = workflow_router \
                    .target_subject_set_id(subject_id, classifications_records)
            except UnidentifiedRawSubjectSetException as ex:
                logger.error(ex.args[0])
                continue
            except SharedMajorityException as ex:
                # TODO need add'l monitoring for this, e.g. manual report exception
                logger.error(ex.args[0])
                continue
            vertices_and_target_subject_sets.append(
                [subject_id, centroids, target_subject_set_id])

        # Aggregate retired subjects
        workflow = Workflow.find(metadata['workflow_id'])
        retirement_count = workflow.retirement['options']['count']
        retired_subject_ids += classifications.retired_subject_ids(
            metadata['task_id'], retirement_count)

    logger.debug(
        'Retrieved the following subject centroids for image segmentation: %s',
        str(vertices_and_target_subject_sets))

    logger.debug('For the following retired subject IDs: %s',
                 str(retired_subject_ids))

    queue = Queue(connection=Redis(host=settings.REDIS_HOST))

    for subject_id, centroids, target_subject_set_id in vertices_and_target_subject_sets:
        if subject_id not in retired_subject_ids:
            continue
        subject = Subject.find(subject_id)
        if settings.METADATA_KEY_ALREADY_PROCESSED in subject.metadata and \
           subject.metadata[settings.METADATA_KEY_ALREADY_PROCESSED]:
            logger.debug('Skipping subject id %d; already processed.',
                         subject_id)
            continue
        logger.debug('Enqueuing subjects id: %d', subject_id)
        queue.enqueue(QueueOperations.queue_new_subject_creation,
                      subject_id,
                      centroids,
                      target_subject_set_id,
                      timeout=2 * 60 * 60)
        QueueOperations.flag_subject_as_queued(subject)
Ejemplo n.º 33
0
    size = file_bytes.tell()
    print('Uploading ', original_file, scale, resized_width, size)
    #  ensure the file pointer is returned to the beginning of the file-like object
    file_bytes.seek(0, 0)
    return file_bytes


parser = argparse.ArgumentParser(description='Zooniverse Uploader')
parser.add_argument('image_dir')
parser.add_argument('--subject', '-s', required=True)
args = parser.parse_args()

set_name = args.subject

#  connect to zooniverse - requires the User_name and Password to be set up as environmental variables in your OS
Panoptes.connect(username=os.environ['ZOONIVERSE_USERNAME'], password=os.environ['ZOONIVERSE_PASSWORD'])
#  modify the project slug if used for other than Snapshots at Sea
project = Project.find(slug='tedcheese/snapshots-at-sea')

if not os.path.exists(args.image_dir):
    print('[%s] does not exist.' % args.image_dir)
    sys.exit()

#  load the list of image files found in the directory:
#  The local file name will be uploaded as metadata with the image
file_types = ['jpg', 'jpeg']
subject_metadata = {}
for entry in os.listdir(args.image_dir):
    if entry.partition('.')[2].lower() in file_types:
        subject_metadata[entry] = {'Filename': entry}
print('Found ', len(subject_metadata), ' files to upload in this directory.')
Ejemplo n.º 34
0
description = 'Shallow clouds are the nemesis of climate modelers. Help us by detecting cloud organization from satellite images.'
subject_name = 'EUREC4A-ICON-Scenes'

# Read filenames to upload
# import pdb; pdb.set_trace()
# files = sorted(glob.glob(path1+'*.jpeg'))
# files = files + sorted(glob.glob(path2+'*.jpeg'))
# files = files + sorted(glob.glob(path3+'*C02*.jpeg'))
files = sorted(glob.glob(path1 + '*[02468]??.jpeg'))

# Create metadata
subject_metadata = {}
for f, file in enumerate(files):
    subject_metadata[file] = {'file': file, 'subject_reference': f}

Panoptes.connect(username=username, password=password)
# tutorial_project = Project()
tutorial_project = Project.find(7699)
# tutorial_project.display_name = display_name
# tutorial_project.description = description
# tutorial_project.primary_language = 'en'
# tutorial_project.private =True
# tutorial_project.save()

subject_set = SubjectSet()
subject_set.links.project = tutorial_project
subject_set.display_name = subject_name
subject_set.save()

tutorial_project.reload()
print(tutorial_project.links.subject_sets)