Example #1
0
def cli(ctx, endpoint, admin):
    ctx.config_dir = os.path.expanduser('~/.panoptes/')
    ctx.config_file = os.path.join(ctx.config_dir, 'config.yml')
    ctx.config = {
        'endpoint': 'https://www.zooniverse.org',
        'username': '',
        'password': '',
    }

    try:
        with open(ctx.config_file) as conf_f:
            ctx.config.update(yaml.full_load(conf_f))
    except IOError:
        pass

    if endpoint:
        ctx.config['endpoint'] = endpoint

    if ctx.invoked_subcommand != 'configure':
        Panoptes.connect(
            endpoint=ctx.config['endpoint'],
            username=ctx.config['username'],
            password=ctx.config['password'],
            admin=admin,
        )
Example #2
0
 def __connect(self):
     """
     Connect to the panoptes client api
     :return:
     """
     Panoptes.connect(username=self.username, password=self.password)
     return Project.find(self.project_id)
Example #3
0
def add_to_subject_set(subject_set_id,
                       subject_set_file,
                       username=None,
                       password=None):
    """
    Import a 1 column file of subject_ids to a subject_set.

    Parameters
    ----------
    subject_set_id : str
        subject set ID linked to the web interface

    subject_set_file : str
        one-column file of subject IDs (output of cull_subject_ids)

    username, password : str, str
        if passed, will add subject set ids to the subject set on the web.
    """
    lines = []
    with open(subject_set_file) as subject_ids:
        lines.append(subject_ids.read().splitlines())

    if username is not None:
        try:
            from panoptes_client import Panoptes, SubjectSet
        except ImportError:
            print(
                'Install https://github.com/zooniverse/panoptes-python-client')
            sys.exit(1)

        Panoptes.connect(username=username, password=password)
        subject_set = SubjectSet.find(subject_set_id)
        subject_set.add(np.unique(lines))
    return
Example #4
0
def find_duplicates():
    Panoptes.connect(username='******', password=getpass())

    gzb_project = Project.find(slug='tingard/galaxy-builder')

    subject_sets = []
    for set in gzb_project.links.subject_sets:
        subject_sets.append(list(set.subjects))

    subjects = [j for i in subject_sets for j in i]

    subject_set_ids = [[np.int64(j.id) for j in i] for i in subject_sets]
    ids = [int(i.id) for i in subjects]
    dr7objids = [np.int64(i.metadata.get('SDSS dr7 id', False)) for i in subjects]

    pairings = sorted(zip(ids, dr7objids), key=lambda i: i[0])
    df = pd.DataFrame(pairings, columns=('subject_id', 'dr7objid'))
    df = df[df['dr7objid'] != 0].groupby('subject_id').max()
    n_sids = len(df)
    n_dr7ids = len(df.groupby('dr7objid'))
    print('{} unique subject ids'.format(n_sids))
    print('{} unique dr7 object ids'.format(n_dr7ids))
    print('{} duplicate galaxies'.format(n_sids - n_dr7ids))


    groups = np.array([np.concatenate(([i[0]], i[1].index.values)) for i in df.groupby('dr7objid') if len(i[1]) > 1])
    # okay, what subject sets are our duplicates?
    s1 = gzb_project.links.subject_sets[
        np.argmax([np.all(np.isin(subject_set_ids[i], groups[:, 1])) for i in range(len(subject_set_ids))])
    ]
    s2 = gzb_project.links.subject_sets[
        np.argmax([np.all(np.isin(subject_set_ids[i], groups[:, 2])) for i in range(len(subject_set_ids))])
    ]
    print(s1, s2)
    return groups
def make_tutorial_images(imagePaths, ellipseData, projectData):
    # Connect to Panoptes
    Panoptes.connect(
        username=projectData["user_name"], password=projectData["password"]
    )

    newSubjects = []
    for imageId, imagePath in enumerate(imagePaths):
        print(f"Adding {imagePath}...")
        try:
            subjectSet = SubjectSet.find(projectData["subject_set"])
        except PanoptesAPIException as e:
            print(e)
            return
        newSubject = Subject()
        newSubject.add_location(imagePath)
        newSubject.links.project = subjectSet.links.project
        newSubject.metadata.update(
            make_metadata(
                ellipseData.get_group(imageId).reset_index(drop=True), imagePath
            )
        )
        newSubject.save()
        newSubjects.append(newSubject)
    subjectSet.add(newSubjects)
Example #6
0
def upload_manifest_to_galaxy_zoo(subject_set_name,
                                  manifest,
                                  galaxy_zoo_id='5733',
                                  n_processes=10):
    """
    Save manifest (set of galaxies with metadata prepared) to Galaxy Zoo

    Args:
        subject_set_name (str): name for subject set
        manifest (list): containing dicts of form {png_loc: img.png, key_data: {metadata_col: metadata_value}}
        galaxy_zoo_id (str): panoptes project id e.g. '5733' for Galaxy Zoo, '6490' for mobile
        n_processes (int): number of processes with which to upload galaxies in parallel

    Returns:
        None
    """
    if 'TEST' in subject_set_name:
        logging.warning('Testing mode detected - not uploading!')
        return manifest

    if galaxy_zoo_id == '5733':
        logging.info('Uploading to Galaxy Zoo project 5733')
    elif galaxy_zoo_id == '6490':
        logging.info('Uploading to mobile app project 6490')
    else:
        logging.info('Uploading to unknown project {}'.format(galaxy_zoo_id))

    # Important - don't commit the password!
    zooniverse_login = read_data_from_txt(zooniverse_login_loc)
    Panoptes.connect(**zooniverse_login)

    galaxy_zoo = Project.find(galaxy_zoo_id)

    subject_set = SubjectSet()

    subject_set.links.project = galaxy_zoo
    subject_set.display_name = subject_set_name
    subject_set.save()

    pbar = tqdm(total=len(manifest), unit=' subjects uploaded')

    save_subject_params = {'project': galaxy_zoo, 'pbar': pbar}
    save_subject_partial = functools.partial(save_subject,
                                             **save_subject_params)
    pool = ThreadPool(n_processes)
    new_subjects = pool.map(save_subject_partial, manifest)
    pbar.close()
    pool.close()
    pool.join()

    # new_subjects = []
    # for subject in manifest:
    #     print(subject)
    #     new_subjects.append(save_subject_partial(subject))

    subject_set.add(new_subjects)

    return manifest  # for debugging only
Example #7
0
def get_panoptes_auth_token():
    # This token is only valid for ~2 hours. Don't use for long-running downloads.
    # Here, we only need a few calls to get the workflow versions
    # Will ask the devs to expose this nicely with the already-built expiry check.
    with open(api_to_json.ZOONIVERSE_LOGIN_LOC,
              'r') as f:  # beware sneaky shared global state
        zooniverse_login = json.load(f)
    Panoptes.connect(**zooniverse_login)
    return Panoptes._local.panoptes_client.get_bearer_token()
def _retrieve_user(user_id):
    if user_id in users:
        user = users[user_id]
    else:
        Panoptes.connect(endpoint=getenv('PANOPTES_URL',
                                         'https://panoptes.zooniverse.org/'),
                         client_id=getenv('PANOPTES_CLIENT_ID'),
                         client_secret=getenv('PANOPTES_CLIENT_SECRET'))

        user = User.find(user_id)
        users[user_id] = user

    return user
def main(production=False):
    uname = input('Enter your username: '******'https://panoptes-staging.zooniverse.org',
        admin=True
    )
    pId = 5733  # if production else 1820
    project = Project.find(pId)
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = 'Test_subject_set_' + str(int(time.time()))
    subject_set.save()

    loc = os.path.abspath(os.path.dirname(__file__))
    subjects = os.listdir(loc + '/subjects')
    images, differences, model, metadata = [
        sorted((
            int(re.match(r'{}_([0-9]+)\.(?:json|png)$'.format(s), i).group(1))
            for i in subjects
            if re.match(r'{}_([0-9]+)\.(?:json|png)$'.format(s), i)
        ))
        for s in ('difference', 'image', 'model', 'metadata')
    ]
    if not images == differences == model == metadata:
        print(
            'Images, differences, model and metadata '
            + 'must all have same length'
        )

    # TODO: change subject directory structure to be more efficient
    #       (not having 12,000+ files in a folder...)
    for i in images:
        try:
            with open('{}/subjects/metadata_{}.json'.format(loc, i)) as f:
                metadata = json.load(f)
        except IOError:
            metadata = {}
        subject_set = uploadSubjectToSet(
            project, subject_set,
            [[j.format(loc, i) for j in (
                '{}/subjects/image_{}.png',
                '{}/subjects/difference_{}.json',
                '{}/subjects/model_{}.json'
            )]],  # locations
            [metadata],
        )
def create_subject_set(folder_name, set_name='test_subject_set'):
    subject_names = [
        i.group(1)
        for i in (
            re.match(r'image_(.*?).png', f)
            for f in os.listdir(folder_name)
        )
        if i is not None
    ]
    files = [
        [
            join(folder_name, file_name)
            for file_name in (
                'image_{}.png'.format(subject_name),
                'difference_{}.json'.format(subject_name),
                'model_{}.json'.format(subject_name),
                'metadata_{}.json'.format(subject_name),
            )
        ]
        for subject_name in subject_names
    ]
    assert all(os.path.exists(j) for i in files for j in i), 'Missing files!'
    uname = input('Enter your username: ')
    pwd = getpass.getpass()
    Panoptes.connect(
        username=uname,
        password=pwd,
        admin=True
    )
    pId = 5590
    project = Project.find(pId)
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = set_name
    subject_set.save()
    metadata_list = []
    for fs in files:
        try:
            with open(fs[3]) as metaF:
                metadata = json.load(metaF)
        except IOError:
            metadata = {}
        metadata_list.append(metadata)
    subject_set = uploadSubjectToSet(
        project, subject_set,
        [i[:3] for i in files],
        metadata_list,
    )
Example #11
0
def _retrieve_user(user_id):
    if user_id in users:
        user = users[user_id]
    else:
        Panoptes.connect(endpoint=getenv('PANOPTES_URL',
                                         'https://panoptes.zooniverse.org/'),
                         client_id=getenv('PANOPTES_CLIENT_ID'),
                         client_secret=getenv('PANOPTES_CLIENT_SECRET'))
        try:
            user = User.find(user_id)
        except PanoptesAPIException:
            # some users are not found in panoptes
            # return an empty class with an `id` attribute
            user = CantFindUser(user_id)
        users[user_id] = user

    return user
def panoptes_connect():
    # file with username and password on the first line with a space in between
    panoptesuserfile = 'panoptesuserfile.txt'

    with open(panoptesuserfile) as fp:
        uinfo = (fp.readline()).strip().split()

    return Panoptes.connect(username=uinfo[0], password=uinfo[1])
Example #13
0
def upload_images(id, use_database=True):
    print('Create subject set and upload images for', id)
    if use_database:
        update_status(id, gz_status='Uploading')
    wd = os.getcwd()
    Panoptes.connect(username='******',
                     password=os.environ['PANOPTES_PASSWORD'])
    os.chdir(target + id)
    project = Project.find(slug='chrismrp/radio-galaxy-zoo-lofar')
    subject_set = SubjectSet()

    subject_set.display_name = id
    subject_set.links.project = project
    subject_set.save()
    print('Made subject set')
    new_subjects = []
    g = glob.glob('*-manifest.txt')
    for i, f in enumerate(g):
        bits = open(f).readlines()[0].split(',')
        metadata = {
            'subject_id': int(bits[0]),
            'ra': float(bits[5]),
            'dec': float(bits[6]),
            '#size': float(bits[7]),
            'source_name': bits[4]
        }
        print('Upload doing', bits[4], '%i/%i' % (i, len(g)))
        subject = Subject()
        subject.links.project = project
        subject.metadata.update(metadata)
        for location in bits[1:4]:
            subject.add_location(location)
        subject.save()
        new_subjects.append(subject)

    subject_set.add(new_subjects)

    workflow = Workflow(11973)
    workflow.links.subject_sets.add(subject_set)
    if use_database:
        update_status(id, gz_status='In progress')
    print('Done!')
Example #14
0
def main(argv=None):
    """Main caller for workflow1to2"""
    parser = argparse.ArgumentParser(
        description="Add subject sets to workflow 2a,b,c from classifications")

    parser.add_argument('username', type=str, help='zooniverse username')

    parser.add_argument('password', type=str, help='password')

    parser.add_argument('-o', '--overwrite', action='store_true')
    parser.add_argument('-a', '--add', action='store_true')

    args = parser.parse_args(argv)
    # log in.
    Panoptes.connect(username=args.username, password=args.password)

    classifications_filename = 'astronomy-rewind-classifications.csv'
    wf1_filename = 'astronomy-rewind-classifications_wf1.csv'

    cull_wf1(classifications_filename, wf1_filename, overwrite=args.overwrite)
    cull_subject_ids(wf1_filename, overwrite=args.overwrite, add=args.add)
Example #15
0
def main(production=False):
    uname = input('Enter your username: '******'https://panoptes-staging.zooniverse.org',
                     admin=True)
    pId = 5590 if production else 1820
    project = Project.find(pId)
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = 'Test_subject_set_' + str(int(time.time()))
    subject_set.save()

    loc = os.path.abspath(os.path.dirname(__file__))

    subjects = os.listdir(loc + '/subjects')

    # TODO: change subject directory structure to be more efficient
    #       (not having 12,000+ files in a folder...)
    for i in range(20):
        if 'image_{}.png'.format(i) in subjects:
            try:
                with open('{}/subjects/metadata_{}.json'.format(loc, i)) as f:
                    metadata = json.load(f)
            except IOError:
                metadata = {}
            subject_set = uploadSubjectToSet(
                project,
                subject_set,
                [[
                    j.format(loc, i)
                    for j in ('{}/subjects/image_{}.png',
                              '{}/subjects/difference_{}.json',
                              '{}/subjects/model_{}.json')
                ]],  # locations
                [metadata],
            )
        else:
            break
Example #16
0
def cli(ctx, endpoint):
    ctx.config_dir = os.path.join(os.environ['HOME'], '.panoptes')
    ctx.config_file = os.path.join(ctx.config_dir, 'config.yml')
    ctx.config = {
        'endpoint': 'https://panoptes.zooniverse.org',
        'username': '',
        'password': '',
    }

    try:
        with open(ctx.config_file) as conf_f:
            ctx.config.update(yaml.load(conf_f))
    except IOError:
        pass

    if endpoint:
        ctx.config['endpoint'] = endpoint

    Panoptes.connect(
        endpoint=ctx.config['endpoint'],
        username=ctx.config['username'],
        password=ctx.config['password']
    )
    def create_subjects_and_link_to_project(self, proto_subjects, project_id,
                                            workflow_id, subject_set_id):

        try:
            USERNAME = os.getenv('PANOPTES_USERNAME')
            PASSWORD = os.getenv('PANOPTES_PASSWORD')
            Panoptes.connect(username=USERNAME,
                             password=PASSWORD,
                             endpoint=self.ENDPOINT)

            project = Project.find(project_id)
            workflow = Workflow().find(workflow_id)

            if subject_set_id == None:
                subject_set = SubjectSet()
                ts = time.gmtime()
                subject_set.display_name = time.strftime(
                    "%m-%d-%Y %H:%M:%S", ts)
                subject_set.links.project = project

                subject_set.save()
            else:
                subject_set = SubjectSet().find(subject_set_id)
            subjects = []
            for proto_subject in proto_subjects:
                subject = Subject()
                subject.links.project = project
                subject.add_location(proto_subject['location_lc'])
                subject.add_location(proto_subject['location_ps'])
                subject.metadata.update(proto_subject['metadata'])
                subject.save()
                subjects.append(subject)

            subject_set.add(subjects)
            workflow.add_subject_sets(subject_set)
        except Exception:
            self.log.exception("Error in create_subjects_and_link_to_project ")
 def get_conn(self):
     if self._panoptes_client is None:
         self.log.info(f"{self.__class__.__name__} version {__version__}")
         self.log.debug(
             f"getting connection information from {self._conn_id}")
         config = self.get_connection(self._conn_id)
         ctyp = config.conn_type or self.DEFAULT_CONN_TYPE
         host = config.host or self.DEFAULT_HOST
         port = config.port or self.DEFAULT_PORT
         slug = config.schema
         login = config.login
         password = config.password
         if config.extra:
             try:
                 extra = json.loads(config.extra)
             except json.decoder.JSONDecodeError:
                 self._auto_disable_subject_sets = False
             else:
                 self._auto_disable_subject_sets = extra.get(
                     "auto_disable_ssets", False)
         if not login:
             raise MissingLoginError(self._conn_id)
         if not password:
             raise MissingPasswordError(self._conn_id)
         if not slug:
             raise MissingSchemaError(self._conn_id)
         project_slug = f"{login}/{slug}"
         endpoint = f"{ctyp}://{host}:{port}"
         self._panoptes_client = Panoptes.connect(username=login,
                                                  password=password,
                                                  endpoint=endpoint)
         self._project = Project.find(slug=project_slug)
         self.log.info(
             f"Searching project by slug {project_slug} found: {self._project}"
         )
     return self._panoptes_client, self._project
Example #19
0
def run():
    """
    Query for completed subjects, calculate kmeans vertex centroids, fetch subject images, split
    columns by centroids, row segmentatino with Ocropy.
    """

    logger = setup_logger(settings.APP_NAME,
                          'log/kmeans_and_enqueue_completed_subjects.log',
                          logging.DEBUG)

    subject_set_csv = SubjectSetCSV()
    workflow_router = SubjectSetWorkflowRouter(subject_set_csv, settings,
                                               logger)
    pages_raw_subject_ids = subject_set_csv.raw_pages_subject_ids()
    logger.debug("Running Wires and Rails Workflow Processor")
    Panoptes.connect(username=settings.PANOPTES_USERNAME,
                     password=settings.PANOPTES_PASSWORD)

    retired_subject_ids = []

    vertices_and_target_subject_sets = []

    for _subject_set_id, metadata in settings.COLUMNS_WORKFLOW_METADATA.items(
    ):

        logger.debug("Loading vertices / subject retirement info for %(debug_name)s subject set " \
            "(subject set id: %(subject_set_id)d; workflow id: %(workflow_id)d; task id: " \
            " %(task_id)s", metadata)

        classification_kwargs = {
            'scope': 'project',
            'project_id': settings.PROJECT_ID,
            'workflow_id': metadata['workflow_id']
        }
        logger.debug("Loading classifications by params %s",
                     str(classification_kwargs))
        classifications_records = [
            c for c in Classification.where(**classification_kwargs)
        ]

        classifications = VertexClassifications(classifications_records,
                                                pages_raw_subject_ids)

        # Aggregate vertex centroids
        centroids_by_subject = classifications.vertex_centroids(
            metadata['task_id'])
        for subject_id, centroids in centroids_by_subject.items():
            # Find target subject set ID, or log and skip the subject
            try:
                target_subject_set_id = workflow_router \
                    .target_subject_set_id(subject_id, classifications_records)
            except UnidentifiedRawSubjectSetException as ex:
                logger.error(ex.args[0])
                continue
            except SharedMajorityException as ex:
                # TODO need add'l monitoring for this, e.g. manual report exception
                logger.error(ex.args[0])
                continue
            vertices_and_target_subject_sets.append(
                [subject_id, centroids, target_subject_set_id])

        # Aggregate retired subjects
        workflow = Workflow.find(metadata['workflow_id'])
        retirement_count = workflow.retirement['options']['count']
        retired_subject_ids += classifications.retired_subject_ids(
            metadata['task_id'], retirement_count)

    logger.debug(
        'Retrieved the following subject centroids for image segmentation: %s',
        str(vertices_and_target_subject_sets))

    logger.debug('For the following retired subject IDs: %s',
                 str(retired_subject_ids))

    queue = Queue(connection=Redis(host=settings.REDIS_HOST))

    for subject_id, centroids, target_subject_set_id in vertices_and_target_subject_sets:
        if subject_id not in retired_subject_ids:
            continue
        subject = Subject.find(subject_id)
        if settings.METADATA_KEY_ALREADY_PROCESSED in subject.metadata and \
           subject.metadata[settings.METADATA_KEY_ALREADY_PROCESSED]:
            logger.debug('Skipping subject id %d; already processed.',
                         subject_id)
            continue
        logger.debug('Enqueuing subjects id: %d', subject_id)
        queue.enqueue(QueueOperations.queue_new_subject_creation,
                      subject_id,
                      centroids,
                      target_subject_set_id,
                      timeout=2 * 60 * 60)
        QueueOperations.flag_subject_as_queued(subject)
Example #20
0
    size = file_bytes.tell()
    print('Uploading ', original_file, scale, resized_width, size)
    #  ensure the file pointer is returned to the beginning of the file-like object
    file_bytes.seek(0, 0)
    return file_bytes


parser = argparse.ArgumentParser(description='Zooniverse Uploader')
parser.add_argument('image_dir')
parser.add_argument('--subject', '-s', required=True)
args = parser.parse_args()

set_name = args.subject

#  connect to zooniverse - requires the User_name and Password to be set up as environmental variables in your OS
Panoptes.connect(username=os.environ['ZOONIVERSE_USERNAME'], password=os.environ['ZOONIVERSE_PASSWORD'])
#  modify the project slug if used for other than Snapshots at Sea
project = Project.find(slug='tedcheese/snapshots-at-sea')

if not os.path.exists(args.image_dir):
    print('[%s] does not exist.' % args.image_dir)
    sys.exit()

#  load the list of image files found in the directory:
#  The local file name will be uploaded as metadata with the image
file_types = ['jpg', 'jpeg']
subject_metadata = {}
for entry in os.listdir(args.image_dir):
    if entry.partition('.')[2].lower() in file_types:
        subject_metadata[entry] = {'Filename': entry}
print('Found ', len(subject_metadata), ' files to upload in this directory.')
Example #21
0
description = 'Shallow clouds are the nemesis of climate modelers. Help us by detecting cloud organization from satellite images.'
subject_name = 'EUREC4A-ICON-Scenes'

# Read filenames to upload
# import pdb; pdb.set_trace()
# files = sorted(glob.glob(path1+'*.jpeg'))
# files = files + sorted(glob.glob(path2+'*.jpeg'))
# files = files + sorted(glob.glob(path3+'*C02*.jpeg'))
files = sorted(glob.glob(path1 + '*[02468]??.jpeg'))

# Create metadata
subject_metadata = {}
for f, file in enumerate(files):
    subject_metadata[file] = {'file': file, 'subject_reference': f}

Panoptes.connect(username=username, password=password)
# tutorial_project = Project()
tutorial_project = Project.find(7699)
# tutorial_project.display_name = display_name
# tutorial_project.description = description
# tutorial_project.primary_language = 'en'
# tutorial_project.private =True
# tutorial_project.save()

subject_set = SubjectSet()
subject_set.links.project = tutorial_project
subject_set.display_name = subject_name
subject_set.save()

tutorial_project.reload()
print(tutorial_project.links.subject_sets)
B-1180 Brussels
BELGIUM

phone  : +32 (0)2 373.04.19
e-mail : [email protected]
web    : www.aeronomie.be
________________________________________________
"""

filename = "panoptes_test2_export.csv"

from panoptes_client import Project, Panoptes
from panoptes_client.panoptes import PanoptesAPIException
import requests, sys

Panoptes.connect(username=username, password=password)

project = Project.find(slug='zooniverse/radio-meteor-zoo')

#r = project.get_classifications_export(generate=True, wait=True, wait_timeout=1800)
wait_timeout = 60
project.generate_classifications_export()
for attempt in range(60):
    print "wait classification export (attempt %d)" % attempt
    sys.stdout.flush()
    try:
        export = project.wait_classifications_export(wait_timeout)
    except PanoptesAPIException as e:
        print str(e)[:32]
        if str(e)[:32] == "classifications_export not ready":        
            continue
#This program takes the list of classifications crowdsourced in Zooniverse and updates the same in MongoDB corresponding to the subjects.

#Importing required packages
from panoptes_client import SubjectSet, Subject, Project, Panoptes
from pymongo import MongoClient
import argparse
import datetime
import itertools
import csv
import json
import io
import csh_db_config
import zooniverse_config

# connect to zooniverse
Panoptes.connect(username=zooniverse_config.Zooniverse_USERNAME,
                 password=zooniverse_config.Zooniverse_PASS)
project = Project.find(zooniverse_config.Project_ID)

# connection to mongodb
mongoConn = MongoClient(csh_db_config.DB_HOST + ":" +
                        str(csh_db_config.DB_PORT))
cshTransDB = mongoConn[csh_db_config.TRANSCRIPTION_DB_NAME]
cshTransDB.authenticate(csh_db_config.TRANSCRIPTION_DB_USER,
                        csh_db_config.TRANSCRIPTION_DB_PASS)
cshCollection = cshTransDB[csh_db_config.TRANS_DB_MeetingMinColl]
cshSubjectSets = cshTransDB[csh_db_config.TRANS_DB_SubjectSets]

classification_export = Project(
    zooniverse_config.Project_ID).get_export('classifications')
classification = classification_export.content.decode('utf-8')
import re
import urllib2
import os
import getpass
import wikipedia


# ask user for login and object they want to classify
thing = raw_input("What would you like to classify? ")
user = raw_input("Zooniverse username: "******"password: "******"lxml")
Example #25
0
#!/usr/bin/env python3
"""
Un-flag arbitrary subjects as not processed, useful for debugging workflow processing.
"""

import sys
sys.path.insert(0, "..")

from panoptes_client import Panoptes, Subject

from lib import settings

Panoptes.connect(username=settings.PANOPTES_USERNAME,
                 password=settings.PANOPTES_PASSWORD)

# SUBJECT_IDS = ['5823821', '5823822']
# SUBJECT_IDS = ['14813279', '14813280', '14813281']
# SUBJECT_IDS = ['15327062','15327056','15327068','15327065']

# Telegraph tests -
SUBJECT_IDS = ['15327068', '15327065', '15327062', '15327059', '15327056']

for subject_id in SUBJECT_IDS:
    subject = Subject.find(subject_id)
    subject.metadata[settings.METADATA_KEY_ALREADY_PROCESSED] = False
    subject.save()
Example #26
0
import yaml

import os
import urllib


SUBJECT_ID_FILE = 'subjects.txt'


with open('config.yaml') as config_f:
    config = yaml.load(config_f, Loader=yaml.FullLoader)

with open(SUBJECT_ID_FILE) as subject_id_f:
    subject_ids = [ s.strip() for s in subject_id_f.readlines() ]

Panoptes.connect(**config)

with ChargingBar(
    'Updating',
    max=len(subject_ids),
    suffix='%(percent).1f%% %(eta_td)s'
) as bar:
    with Subject.async_saves():
        for subject_id in subject_ids:
            bar.next()

            subject = Subject.find(subject_id)

            if '!CERiT' in subject.metadata:
                continue
            # DEFINE ZOONIVERSE SUBJECT METADATA, corresponds to METADATA FROM ITEM above, update both accordingly
            segment['metadata'] = {
                'APA Citation': apa_citation,
                'Date': item_date,
                'Library Of Congress Item ID': loc_id,
                'Source Collection': source_collection,
                'Title': item_title
            }

            segments.append(segment)
    print('Item segments transformation complete.')
    return segments

segments = transform_item_segments('https://www.loc.gov/item/' + LIBRARY_OF_CONGRESS_ITEM_ID)

Panoptes.connect(username=USERNAME, password=PASSWORD, endpoint=ENDPOINT)

project = Project.find(PROJECT)

subject_set = SubjectSet()
subject_set.links.project = project
subject_set.display_name = segments[0]['metadata']['Title'] # uses item Title as default subject set name, or feel free to hardcode
subject_set.save()

print('Begin Zooniverse subject upload...')
for segment in segments:
    subject = Subject()

    subject.links.project = project
    subject.add_location(segment['location'])
def connect_api_client():
    # connect to the API only once for this function request
    Panoptes.connect(endpoint=getenv('PANOPTES_URL',
                                     'https://panoptes.zooniverse.org/'),
                     client_id=getenv('PANOPTES_CLIENT_ID'),
                     client_secret=getenv('PANOPTES_CLIENT_SECRET'))
Example #29
0
""" This version is written in Python 3.66
This script attempts to retrieve the exif data from existing subject image files
and add the datetime to the subject metadata. It requires the project owner credentials
to be set up as OS environmental variables, and an appropriate project slug modified
on line 11.  depending on the camera used to take the original subject image the exif
code may be different than that in the code and may need to be modified"""
import os
from PIL import Image, ExifTags
import panoptes_client
from panoptes_client import SubjectSet, Project, Panoptes
import requests
Panoptes.connect(username=os.environ['User_name'], password=os.environ['Password'])
project = Project.find(slug='pmason\fossiltrainer')

while True:
    set_id = input('Entry subject set id to update:' + '\n')
    try:
        subject_set = SubjectSet.find(set_id)
        count_subjects = 0
        subject_list = []
        for subject in subject_set.subjects:
            count_subjects += 1
            if subject.metadata['DateTime'] == '':
                try:
                    img = Image.open(requests.get(subject.locations[0]['image/jpeg'], stream=True).raw)
                    exif_dict = img._getexif()
                    date_time = exif_dict[306]
                except (IOError, KeyError):
                    print('Acquiring exif data for ', subject.id, ' failed')
                    continue
                subject.metadata['DateTime'] = date_time
Example #30
0
    def upload_chunks(self,
                      chunks: str,
                      project_id: int,
                      set_name: str,
                      zooniverse_login="",
                      zooniverse_pwd="",
                      amount: int = 1000,
                      ignore_errors: bool = False,
                      **kwargs):
        """Uploads ``amount`` audio chunks from the CSV dataframe `chunks` to a zooniverse project.

        :param chunks: path to the chunk CSV dataframe
        :type chunks: [type]
        :param project_id: zooniverse project id
        :type project_id: int
        :param set_name: name of the subject set
        :type set_name: str
        :param zooniverse_login: zooniverse login. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_LOGIN`` instead, defaults to ''
        :type zooniverse_login: str, optional
        :param zooniverse_pwd: zooniverse password. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_PWD`` instead, defaults to ''
        :type zooniverse_pwd: str, optional
        :param amount: amount of chunks to upload, defaults to 0
        :type amount: int, optional
        """

        self.chunks_file = chunks
        self.get_credentials(zooniverse_login, zooniverse_pwd)

        metadata_location = os.path.join(self.chunks_file)
        try:
            self.chunks = pd.read_csv(metadata_location, index_col="index")
        except:
            raise Exception("cannot read chunk metadata from {}.".format(
                metadata_location))

        assert_dataframe("chunks", self.chunks)
        assert_columns_presence(
            "chunks",
            self.chunks,
            {"recording_filename", "onset", "offset", "uploaded", "mp3"},
        )

        from panoptes_client import Panoptes, Project, Subject, SubjectSet

        Panoptes.connect(username=self.zooniverse_login,
                         password=self.zooniverse_pwd)
        zooniverse_project = Project(project_id)

        subjects_metadata = []
        uploaded = 0

        subject_set = None

        for ss in zooniverse_project.links.subject_sets:
            if ss.display_name == set_name:
                subject_set = ss

        if subject_set is None:
            subject_set = SubjectSet()
            subject_set.links.project = zooniverse_project
            subject_set.display_name = set_name
            subject_set.save()

        subjects = []

        chunks_to_upload = self.chunks[self.chunks["uploaded"] == False].head(
            amount)
        chunks_to_upload = chunks_to_upload.to_dict(orient="index")

        if len(chunks_to_upload) == 0:
            print("nothing left to upload.")
            return

        for chunk_index in chunks_to_upload:
            chunk = chunks_to_upload[chunk_index]

            print("uploading chunk {} ({},{})".format(
                chunk["recording_filename"], chunk["onset"], chunk["offset"]))

            subject = Subject()
            subject.links.project = zooniverse_project
            subject.add_location(
                os.path.join(os.path.dirname(self.chunks_file), "chunks",
                             chunk["mp3"]))
            subject.metadata["date_extracted"] = chunk["date_extracted"]

            try:
                subject.save()
            except Exception as e:
                print("failed to save chunk {}. an exception has occured:\n{}".
                      format(chunk_index, str(e)))
                print(traceback.format_exc())

                if args.ignore_errors:
                    continue
                else:
                    print("subject upload halting here.")
                    break

            subjects.append(subject)

            chunk["index"] = chunk_index
            chunk["zooniverse_id"] = str(subject.id)
            chunk["project_id"] = str(project_id)
            chunk["subject_set"] = str(subject_set.display_name)
            chunk["uploaded"] = True
            subjects_metadata.append(chunk)

        if len(subjects) == 0:
            return

        subject_set.add(subjects)

        self.chunks.update(pd.DataFrame(subjects_metadata).set_index("index"))

        self.chunks.to_csv(self.chunks_file)
Example #31
0
        # print('Failure whilst processing "' + img.filename + '": ' + str(e))
        if os.path.splitext(image)[1] == ".csv":
            os.rename(image, out_path+'/'+os.path.basename(os.path.normpath(image)))

        else:
            f = open(logfile, "a")
            t = time.localtime()
            f.write('\nFailure whilst processing "' + image + '": ' + str(e)+ " " + time.strftime("%D:%H:%M:%S",t)+'\n\n')
            f.close()
            # move error files into seperate folder
            os.rename(image, errorfiles + os.path.basename(os.path.normpath(image)))

# delete the tmp file after the images have been resized

try:
    Panoptes.connect(username=zcfg.login['user'], password=zcfg.login['pass'])
    project = Project.find("6307")
except Exception as e:
    f = open(logfile, "a")
    t = time.localtime()

    f.write('Unable to connect to Zooniverse: '+time.strftime("%D:%H:%M:%S",t)+'\n')
    f.close()


subject_set = SubjectSet()
s = Subject()

subject_set.links.project = project
subject_set.display_name = 'Tutorial subject set 2'
Example #32
0
    def retrieve_classifications(self,
                                 destination: str,
                                 project_id: int,
                                 zooniverse_login: str = "",
                                 zooniverse_pwd: str = "",
                                 chunks: List[str] = [],
                                 **kwargs):
        """Retrieve classifications from Zooniverse as a CSV dataframe.
        They will be matched with the original chunks metadata if the path one 
        or more chunk metadata files is provided.

        :param destination: output CSV dataframe destination
        :type destination: str
        :param project_id: zooniverse project id
        :type project_id: int
        :param zooniverse_login: zooniverse login. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_LOGIN`` instead, defaults to ''
        :type zooniverse_login: str, optional
        :param zooniverse_pwd: zooniverse password. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_PWD`` instead, defaults to ''
        :type zooniverse_pwd: str, optional
        :param chunks: the list of chunk metadata files to match the classifications to. If provided, only the classifications that have a match will be returned.
        :type chunks: List[str], optional
        """
        self.get_credentials(zooniverse_login, zooniverse_pwd)

        from panoptes_client import Panoptes, Project, Classification
        Panoptes.connect(username=self.zooniverse_login,
                         password=self.zooniverse_pwd)
        project = Project(project_id)

        answers_translation_table = []
        for workflow in project.links.workflows:
            workflow_id = workflow.id
            for task_id in workflow.tasks:
                n = 0
                for answer in workflow.tasks[task_id]["answers"]:
                    answers_translation_table.append({
                        "workflow_id":
                        str(workflow_id),
                        "task_id":
                        str(task_id),
                        "answer_id":
                        str(n),
                        "answer":
                        answer["label"],
                    })
                    n += 1

        answers_translation_table = pd.DataFrame(answers_translation_table)

        classifications = []
        for c in Classification.where(scope="project",
                                      page_size=1000,
                                      project_id=project_id):
            classifications.append(c.raw)

        classifications = pd.DataFrame(classifications)
        classifications["user_id"] = classifications["links"].apply(
            lambda s: s["user"])
        classifications["subject_id"] = (classifications["links"].apply(
            lambda s: s["subjects"][0]).astype(int))
        classifications["workflow_id"] = classifications["links"].apply(
            lambda s: s["workflow"])
        classifications["tasks"] = classifications["annotations"].apply(
            lambda s: [(str(r["task"]), str(r["value"])) for r in s])
        classifications = classifications.explode("tasks")
        classifications["task_id"] = classifications["tasks"].str[0]
        classifications["answer_id"] = classifications["tasks"].str[1]
        classifications.drop(columns=["tasks"], inplace=True)

        classifications = classifications[[
            "id", "user_id", "subject_id", "task_id", "answer_id",
            "workflow_id"
        ]]
        classifications = classifications.merge(
            answers_translation_table,
            left_on=["workflow_id", "task_id", "answer_id"],
            right_on=["workflow_id", "task_id", "answer_id"],
        )

        if chunks:
            chunks = pd.concat([pd.read_csv(f) for f in chunks])

            classifications = classifications.merge(chunks,
                                                    left_on="subject_id",
                                                    right_on="zooniverse_id")

        classifications.set_index("id").to_csv(destination)
def get_classifications(save_dir,
                        max_classifications=None,
                        last_id=None,
                        project_id='5733',
                        max_rate_per_sec=40,
                        per_file=5000) -> int:
    """Save as we download line-by-line, to avoid memory issues and ensure results are saved.
    
    Args:
        save_dir ([type]): [description]
        max_classifications ([type], optional): Defaults to None. [description]
        last_id ([type], optional): Defaults to None. [description]
    
    Returns:
        int: last id downloaded
    """
    assert save_dir

    with open(ZOONIVERSE_LOGIN_LOC, 'r') as f:
        zooniverse_login = json.load(f)
    Panoptes.connect(**zooniverse_login)

    # TODO specify workflow if possible?
    classifications = Classification.where(scope='project',
                                           project_id=project_id,
                                           last_id=last_id)

    classification_n = 0
    latest_id = 0
    pbar = tqdm(total=max_classifications)

    min_time = timedelta(seconds=(1. / max_rate_per_sec))
    atomic_file = AtomicFile(save_dir)

    while classification_n < max_classifications:
        try:  # may possibly be requesting the very first classification twice, not clear how - TODO test
            initial_time = datetime.now()
            classification = classifications.next(
            ).raw  # raw is the actual data

            # minor tweaks for convenience
            # replace subject id with subject information from API
            subject_id = classification['links']['subjects'][
                0]  # only works for single-subject projects
            del classification['links']['subjects']
            subject = get_subject(
                subject_id
            )  # assume id is unique, and hence only one match is possible
            classification['links']['subject'] = subject.raw

            atomic_file.add(classification)

            time_elapsed = datetime.now() - initial_time
            if time_elapsed < min_time:
                sleep_seconds = (min_time - time_elapsed).total_seconds()
                logging.debug('Sleeping {} seconds'.format(sleep_seconds))
                time.sleep(sleep_seconds)

        except StopIteration:  # all retrieved
            logging.info('All classifications retrieved')
            break

        if int(classification['id']) > latest_id:
            latest_id = int(classification['id'])

        pbar.update()
        classification_n += 1

    atomic_file.end_file()  # write anything left
    pbar.close()
    return latest_id
Example #34
0
GZ: specobjid, dr8objid, dr7objid, ra, dec, t01_smooth_or_features_a02_features_or_disk_weighted_fraction,
t02_edgeon_a05_no_weighted_fraction, t03_bar_a06_bar_weighted_fraction, t04_spiral_a08_spiral_weighted_fraction

3) Sky match the tables by RA and DEC in topcat, with the MaNGA data as table 1, and GZ as table 2.
4) Remove the second set of RA and DEC columns, and name the first ones 'RA' and 'DEC'
5) Save this file as a csv
6) This only works in python 2 because it uses panoptes_client
'''

import numpy as np
from panoptes_client import SubjectSet, Subject, Project, Panoptes
import os
import progressbar as pb
myusername = os.environ['PANOPTES_USERNAME']
mypassword = os.environ['PANOPTES_PASSWORD']
Panoptes.connect(username= myusername, password=mypassword)

project = Project.find(id='73')

fullsample = SubjectSet.find(5326)
spirals = SubjectSet.find(5324)
bars = SubjectSet.find(5325)
progress = pb.ProgressBar(widgets=[pb.Bar(), pb.ETA()])
data = np.genfromtxt('../GZ3D/MatchedData.csv', delimiter = ',', names=True, 
                      dtype=[('DEC', float), ('IAUNAME', '|S30'),('IFUTARGETSIZE',int),
                             ('MANGAID', '|S10'),('MANGA_TILEID',int),('NSAID', int),
                             ('PETROTH50',float),('RA',float),('SERSIC_TH50',float),
                             ('Z',float),('specobjid', int),('dr8objid', int),
                             ('dr7objid', int),('t01_smooth_or_features_a02_features_or_disk_weighted_fraction', float),
                             ('t02_edgeon_a05_no_weighted_fraction', float),
                             ('t03_bar_a06_bar_weighted_fraction', float),