Esempio n. 1
0
    def add_subject_set(self, display_name, subjects_metadata):
        '''
		Create and Add a new subject set to a workflow
		returned by get_workflows_summary()
		'''
        project = self._project
        subject_set = SubjectSet()
        subject_set.display_name = display_name
        subject_set.links.project = project
        subject_set.save()
        source = subjects_metadata[0]['source']
        if source == self.EPICOLLECT5_SOURCE:
            self.log.info(
                f"Creating {len(subjects_metadata)} subjects to Subject Set {display_name}"
            )
            subjects = self._create_subjects_from_epicollect5(
                project, subjects_metadata)
        else:
            raise NotImplementedError()
        subject_set.add(subjects)
        for workflow in project.links.workflows:
            workflow.add_subject_sets(subject_set)
            self.log.info(
                f"Added new Subject Set '{display_name}' to workflow '{workflow.display_name}'"
            )
def create_subject_set(docref, name):
    print("Attempting to create a subject set via the Zooniverse API")
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = docref + " - " + name
    subject_set.save()
    return subject_set
Esempio n. 3
0
    def add_new_subject(self, image_list, metadata_list, subject_set_name):
        """
        Add a subject and the metadata.  image_list and metadata_list must be
        of equal length
        :param image_list: list of images to be added
        :param metadata_list: list of metadata to be added
        :return:
        """

        # Start by making sure we have two equal length list
        if len(image_list) != len(metadata_list):
            print("Image list and metadata list do not match")

        # Link to the subject set we want
        subject_set = SubjectSet()
        subject_set.links.project = self.project
        subject_set.display_name = subject_set_name
        subject_set.save()

        # Go through the image and metadata list and add the items
        new_subjects = []
        for i in range(len(image_list)):
            subject = Subject()
            subject.links.project = self.project
            subject.add_location(image_list[i])
            subject.metadata.update(metadata_list[i])
            subject.save()
            new_subjects.append(subject)

        subject_set.add(new_subjects)
def create_subject_set(project, subject_set_name):
    # Create a new subject set
    new_set = SubjectSet()
    new_set.links.project = project
    new_set.display_name = subject_set_name
    new_set.save()
    project.add_subject_sets(new_set)
    return new_set
Esempio n. 5
0
def upload_manifest_to_galaxy_zoo(subject_set_name,
                                  manifest,
                                  galaxy_zoo_id='5733',
                                  n_processes=10):
    """
    Save manifest (set of galaxies with metadata prepared) to Galaxy Zoo

    Args:
        subject_set_name (str): name for subject set
        manifest (list): containing dicts of form {png_loc: img.png, key_data: {metadata_col: metadata_value}}
        galaxy_zoo_id (str): panoptes project id e.g. '5733' for Galaxy Zoo, '6490' for mobile
        n_processes (int): number of processes with which to upload galaxies in parallel

    Returns:
        None
    """
    if 'TEST' in subject_set_name:
        logging.warning('Testing mode detected - not uploading!')
        return manifest

    if galaxy_zoo_id == '5733':
        logging.info('Uploading to Galaxy Zoo project 5733')
    elif galaxy_zoo_id == '6490':
        logging.info('Uploading to mobile app project 6490')
    else:
        logging.info('Uploading to unknown project {}'.format(galaxy_zoo_id))

    # Important - don't commit the password!
    zooniverse_login = read_data_from_txt(zooniverse_login_loc)
    Panoptes.connect(**zooniverse_login)

    galaxy_zoo = Project.find(galaxy_zoo_id)

    subject_set = SubjectSet()

    subject_set.links.project = galaxy_zoo
    subject_set.display_name = subject_set_name
    subject_set.save()

    pbar = tqdm(total=len(manifest), unit=' subjects uploaded')

    save_subject_params = {'project': galaxy_zoo, 'pbar': pbar}
    save_subject_partial = functools.partial(save_subject,
                                             **save_subject_params)
    pool = ThreadPool(n_processes)
    new_subjects = pool.map(save_subject_partial, manifest)
    pbar.close()
    pool.close()
    pool.join()

    # new_subjects = []
    # for subject in manifest:
    #     print(subject)
    #     new_subjects.append(save_subject_partial(subject))

    subject_set.add(new_subjects)

    return manifest  # for debugging only
Esempio n. 6
0
    def _create_subject_set(self, project_id, subject_set_name):
        project = Project.find(project_id)

        subject_set = SubjectSet()
        subject_set.display_name = subject_set_name
        subject_set.links.project = project
        subject_set.save()

        return subject_set
def main(production=False):
    uname = input('Enter your username: '******'https://panoptes-staging.zooniverse.org',
        admin=True
    )
    pId = 5733  # if production else 1820
    project = Project.find(pId)
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = 'Test_subject_set_' + str(int(time.time()))
    subject_set.save()

    loc = os.path.abspath(os.path.dirname(__file__))
    subjects = os.listdir(loc + '/subjects')
    images, differences, model, metadata = [
        sorted((
            int(re.match(r'{}_([0-9]+)\.(?:json|png)$'.format(s), i).group(1))
            for i in subjects
            if re.match(r'{}_([0-9]+)\.(?:json|png)$'.format(s), i)
        ))
        for s in ('difference', 'image', 'model', 'metadata')
    ]
    if not images == differences == model == metadata:
        print(
            'Images, differences, model and metadata '
            + 'must all have same length'
        )

    # TODO: change subject directory structure to be more efficient
    #       (not having 12,000+ files in a folder...)
    for i in images:
        try:
            with open('{}/subjects/metadata_{}.json'.format(loc, i)) as f:
                metadata = json.load(f)
        except IOError:
            metadata = {}
        subject_set = uploadSubjectToSet(
            project, subject_set,
            [[j.format(loc, i) for j in (
                '{}/subjects/image_{}.png',
                '{}/subjects/difference_{}.json',
                '{}/subjects/model_{}.json'
            )]],  # locations
            [metadata],
        )
def createSubjectSet(subjName, project):

    #Create the subject set
    subjectSet = SubjectSet()

    #Link to the appropriate project
    subjectSet.links.project = project

    #Set display name of subject set
    subjectSet.display_name = subjName

    #Save subject set to the project
    subjectSet.save()

    return subjectSet
def create_subject_set(folder_name, set_name='test_subject_set'):
    subject_names = [
        i.group(1)
        for i in (
            re.match(r'image_(.*?).png', f)
            for f in os.listdir(folder_name)
        )
        if i is not None
    ]
    files = [
        [
            join(folder_name, file_name)
            for file_name in (
                'image_{}.png'.format(subject_name),
                'difference_{}.json'.format(subject_name),
                'model_{}.json'.format(subject_name),
                'metadata_{}.json'.format(subject_name),
            )
        ]
        for subject_name in subject_names
    ]
    assert all(os.path.exists(j) for i in files for j in i), 'Missing files!'
    uname = input('Enter your username: ')
    pwd = getpass.getpass()
    Panoptes.connect(
        username=uname,
        password=pwd,
        admin=True
    )
    pId = 5590
    project = Project.find(pId)
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = set_name
    subject_set.save()
    metadata_list = []
    for fs in files:
        try:
            with open(fs[3]) as metaF:
                metadata = json.load(metaF)
        except IOError:
            metadata = {}
        metadata_list.append(metadata)
    subject_set = uploadSubjectToSet(
        project, subject_set,
        [i[:3] for i in files],
        metadata_list,
    )
Esempio n. 10
0
def create(quiet, project_id, display_name):
    """
    Creates a new subject set.

    Prints the subject set ID and name of the new subject set.
    """

    subject_set = SubjectSet()
    subject_set.links.project = project_id
    subject_set.display_name = display_name
    subject_set.save()

    if quiet:
        click.echo(subject_set.id)
    else:
        echo_subject_set(subject_set)
Esempio n. 11
0
def upload_images(id, use_database=True):
    print('Create subject set and upload images for', id)
    if use_database:
        update_status(id, gz_status='Uploading')
    wd = os.getcwd()
    Panoptes.connect(username='******',
                     password=os.environ['PANOPTES_PASSWORD'])
    os.chdir(target + id)
    project = Project.find(slug='chrismrp/radio-galaxy-zoo-lofar')
    subject_set = SubjectSet()

    subject_set.display_name = id
    subject_set.links.project = project
    subject_set.save()
    print('Made subject set')
    new_subjects = []
    g = glob.glob('*-manifest.txt')
    for i, f in enumerate(g):
        bits = open(f).readlines()[0].split(',')
        metadata = {
            'subject_id': int(bits[0]),
            'ra': float(bits[5]),
            'dec': float(bits[6]),
            '#size': float(bits[7]),
            'source_name': bits[4]
        }
        print('Upload doing', bits[4], '%i/%i' % (i, len(g)))
        subject = Subject()
        subject.links.project = project
        subject.metadata.update(metadata)
        for location in bits[1:4]:
            subject.add_location(location)
        subject.save()
        new_subjects.append(subject)

    subject_set.add(new_subjects)

    workflow = Workflow(11973)
    workflow.links.subject_sets.add(subject_set)
    if use_database:
        update_status(id, gz_status='In progress')
    print('Done!')
Esempio n. 12
0
def main(production=False):
    uname = input('Enter your username: '******'https://panoptes-staging.zooniverse.org',
                     admin=True)
    pId = 5590 if production else 1820
    project = Project.find(pId)
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = 'Test_subject_set_' + str(int(time.time()))
    subject_set.save()

    loc = os.path.abspath(os.path.dirname(__file__))

    subjects = os.listdir(loc + '/subjects')

    # TODO: change subject directory structure to be more efficient
    #       (not having 12,000+ files in a folder...)
    for i in range(20):
        if 'image_{}.png'.format(i) in subjects:
            try:
                with open('{}/subjects/metadata_{}.json'.format(loc, i)) as f:
                    metadata = json.load(f)
            except IOError:
                metadata = {}
            subject_set = uploadSubjectToSet(
                project,
                subject_set,
                [[
                    j.format(loc, i)
                    for j in ('{}/subjects/image_{}.png',
                              '{}/subjects/difference_{}.json',
                              '{}/subjects/model_{}.json')
                ]],  # locations
                [metadata],
            )
        else:
            break
Esempio n. 13
0
    def create_subjects_and_link_to_project(self, proto_subjects, project_id,
                                            workflow_id, subject_set_id):

        try:
            USERNAME = os.getenv('PANOPTES_USERNAME')
            PASSWORD = os.getenv('PANOPTES_PASSWORD')
            Panoptes.connect(username=USERNAME,
                             password=PASSWORD,
                             endpoint=self.ENDPOINT)

            project = Project.find(project_id)
            workflow = Workflow().find(workflow_id)

            if subject_set_id == None:
                subject_set = SubjectSet()
                ts = time.gmtime()
                subject_set.display_name = time.strftime(
                    "%m-%d-%Y %H:%M:%S", ts)
                subject_set.links.project = project

                subject_set.save()
            else:
                subject_set = SubjectSet().find(subject_set_id)
            subjects = []
            for proto_subject in proto_subjects:
                subject = Subject()
                subject.links.project = project
                subject.add_location(proto_subject['location_lc'])
                subject.add_location(proto_subject['location_ps'])
                subject.metadata.update(proto_subject['metadata'])
                subject.save()
                subjects.append(subject)

            subject_set.add(subjects)
            workflow.add_subject_sets(subject_set)
        except Exception:
            self.log.exception("Error in create_subjects_and_link_to_project ")
Esempio n. 14
0
    def upload_chunks(self,
                      destination,
                      project_slug,
                      set_prefix,
                      zooniverse_login,
                      zooniverse_pwd,
                      batches=0,
                      **kwargs):
        self.destination = destination

        metadata_location = os.path.join(self.destination, 'chunks.csv')
        try:
            self.chunks = pd.read_csv(metadata_location, index_col='index')
        except:
            raise Exception(
                "cannot read chunk metadata in {}. Check the --destination parameter, and make sure you have extracted chunks before."
                .format(metadata_location))

        Panoptes.connect(username=zooniverse_login, password=zooniverse_pwd)
        zooniverse_project = Project.find(slug=project_slug)

        uploaded = 0
        for batch, chunks in self.chunks.groupby('batch'):
            if chunks['uploaded'].all():
                continue

            subjects_metadata = []

            subject_set = SubjectSet()
            subject_set.links.project = zooniverse_project
            subject_set.display_name = "{}_batch_{}".format(set_prefix, batch)
            subject_set.save()
            subjects = []

            _chunks = chunks.to_dict(orient='index')
            for chunk_index in _chunks:
                chunk = _chunks[chunk_index]

                print("uploading chunk {} ({},{}) in batch {}".format(
                    chunk['recording'], chunk['onset'], chunk['offset'],
                    batch))

                subject = Subject()
                subject.links.project = zooniverse_project
                subject.add_location(
                    os.path.join(self.destination, 'chunks', chunk['mp3']))
                subject.metadata['date_extracted'] = chunk['date_extracted']
                subject.save()
                subjects.append(subject)

                chunk['index'] = chunk_index
                chunk['zooniverse_id'] = subject.id
                chunk['project_slug'] = project_slug
                chunk['subject_set'] = str(subject_set.display_name)
                chunk['uploaded'] = True
                subjects_metadata.append(chunk)

            subject_set.add(subjects)

            self.chunks.update(
                pd.DataFrame(subjects_metadata).set_index('index'))

            self.chunks.to_csv(os.path.join(self.destination, 'chunks.csv'))
            uploaded += 1

            if batches > 0 and uploaded >= batches:
                return
Esempio n. 15
0
def main():
    ap = argparse.ArgumentParser(
        description=
        'Given a list of images, bins them into subject sets of size n')

    # require file path to read in images
    ap.add_argument('-f',
                    '--filename',
                    required=True,
                    dest='filename',
                    type=str,
                    help='The name of the file from which to read the images')

    # optionally require subject set size; defaults to 1000
    ap.add_argument(
        '-n',
        '--size',
        required=False,
        dest='n',
        type=int,
        default=1000,
        help='The maximum number of images a subject set should contain. \
                          The value should be between 1 and 10000, inclusive')

    # parse args into variables and check values
    args = vars(ap.parse_args())

    filename = args['filename'] if args['filename'] else None
    n = args['n'] if args['n'] else None

    if not (n >= 1 and n <= 10000):
        raise ValueError('n must be between 1 and 10000, inclusive')

    # connect to zooniverse
    Panoptes.connect(username=zooniverse_config.Zooniverse_USERNAME,
                     password=zooniverse_config.Zooniverse_PASS)
    project = Project.find(zooniverse_config.Project_ID)

    # connection to mongodb
    mongoConn = MongoClient(csh_db_config.DB_HOST + ":" +
                            str(csh_db_config.DB_PORT))
    cshTransDB = mongoConn[csh_db_config.TRANSCRIPTION_DB_NAME]
    cshTransDB.authenticate(csh_db_config.TRANSCRIPTION_DB_USER,
                            csh_db_config.TRANSCRIPTION_DB_PASS)
    cshCollection = cshTransDB[csh_db_config.TRANS_DB_MeetingMinColl]

    # track subject sets being created
    subjectSets = []

    # get the image filenames in a Python list
    with open(filename) as handle:
        filenames = handle.readlines()

    # divide files into groups of n
    filegroups = list([e for e in t if e != None]
                      for t in itertools.zip_longest(*([iter(filenames)] * n)))

    for group in filegroups:
        displayName = '{:%Y-%b-%d %H:%M:%S}'.format(datetime.datetime.now())

        # create a new subject set
        subjectSet = SubjectSet()
        subjectSet.links.project = project
        subjectSet.display_name = displayName
        subjectSet.save()

        subjectSetId = subjectSet.id
        subjectSets.append(subjectSetId)

        # create a new subject for each file and add to the subject set
        for filename in group:
            # remove trailing '\n' character
            filename = filename.rstrip()

            # create a new subject
            subject = Subject()
            subject.links.project = project

            filepath = cshCollection.find_one({'_id':
                                               filename})['file']['anonPath']
            subject.add_location(filepath)
            subject.metadata['ID'] = filename
            subject.save()

            # add to subject set
            subjectSet.add(subject)

            # retrieve and update the record from mongodb
            updateQuery = {
                '$set': {
                    'canCrowdsource': True,
                    'transcription': {
                        'numClassifications': 5,
                        'subjectSetId': subjectSetId,
                        'status': 'sent'
                    }
                }
            }
            record = cshCollection.find_one_and_update({'_id': filename},
                                                       updateQuery)

    # add subject sets to the workflow
    workflow = project.links.workflows[0]
    workflow.add_subject_sets(subjectSets)

    # print helpful information to the console
    print('{} subject sets created with the following IDs: {}'.format(
        len(subjectSets), subjectSets))
images = [a['src'] for a in soup.find_all("img", {"src": re.compile("gstatic.com")})]
#print images
for img in images:
  raw_img = urllib2.urlopen(img).read()
  #add the directory for your image here
  DIR="images/"
  cntr = len([i for i in os.listdir(DIR) if image_type in i]) + 1
  f = open(DIR + image_type + "_"+ str(cntr)+".jpg", 'wb')
  f.write(raw_img)
  f.close()

print 'Creating image set...'

# create the subject set.
subject_set = SubjectSet()
subject_set.links.project = p
subject_set.display_name = "Images of " + thing + '\'s'
subject_set.save()

print 'Uploading images to Zooniverse...'

# add all images to subject set
for i in range(1,21):
    subject = Subject()
    subject.links.project = p
    subject.add_location('images/' + str(thing) + '_' + str(i)+'.jpg')
    subject.save()
    subject_set.add(subject)

print 'Complete.'
Esempio n. 17
0
        retry = input('Enter "y" to try again, any other key to exit' + '\n')
        if retry.lower() != 'y':
            quit()

#  get new subject name
new_set_name = input('Enter a name for the subject set to use or create:' + '\n')

# find or build destination subject set
try:
    # check if the subject set already exits
    subject_set_new = SubjectSet.where(project_id=proj.id, display_name=new_set_name).next()
except:
    # create a new subject set for the new data and link it to the project above
    subject_set_new = SubjectSet()
    subject_set_new.links.project = proj
    subject_set_new.display_name = new_set_name
    subject_set_new.save()

#  iterate through the subjects linking them and verifying they link.
k = 0
for sub in add_subjects:
    try:
        subject_set_new.add(sub)
        print(sub, 'linked to new set')
        k += 1
    except:
        print(sub,  'previously linked or did not link correctly')
print(k, ' subjects linked to subject set ', new_set_name, ' in project ', proj_id)

linked = 0
with open(os.getcwd() + os.sep + 'copied_subjects.csv', 'wt') as file:
                m += 1
        # catch and process the last aggregated group
        subjects_to_add = process_aggregation(subject, m, workflow_id, workflow_version,
                                              bin_1, subjects_to_add)

if step_to_analyse == 'Q4':
    proj = Project.find(slug='tedcheese/whales-as-individuals')  
try:
    # check if the subject set already exits
    subject_set = SubjectSet.where(project_id=proj.id, display_name=set_name).next()
    print("Add subjects to subject set: {}.".format(subject_set.display_name))
except:
    # create a new subject set for the new data and link it to the project above
    subject_set = SubjectSet()
    subject_set.links.project = proj
    subject_set.display_name = set_name
    subject_set.save()
    print("Created a new subject set with id: {}.".format(subject_set.id))

linked_subjects = set()  # use sets to automatically do inclusion test
with open(subject_location) as sub_file:
    r = csv.DictReader(sub_file)
    for sub_row in r:
        if sub_row['subject_set_id'] == subject_set.id:
            linked_subjects |= {sub_row['subject_id']}

add_subjects = (subjects_to_add - linked_subjects)

print("Adding {} subjects to the subject set".format(len(add_subjects)))
k = 0
# iterate through the subjects to advance verifying they load (for now) may use a list later.
                'Title': item_title
            }

            segments.append(segment)
    print('Item segments transformation complete.')
    return segments

segments = transform_item_segments('https://www.loc.gov/item/' + LIBRARY_OF_CONGRESS_ITEM_ID)

Panoptes.connect(username=USERNAME, password=PASSWORD, endpoint=ENDPOINT)

project = Project.find(PROJECT)

subject_set = SubjectSet()
subject_set.links.project = project
subject_set.display_name = segments[0]['metadata']['Title'] # uses item Title as default subject set name, or feel free to hardcode
subject_set.save()

print('Begin Zooniverse subject upload...')
for segment in segments:
    subject = Subject()

    subject.links.project = project
    subject.add_location(segment['location'])

    subject.metadata.update(segment['metadata'])

    subject.save()
    subject_set.add(subject)

print("Zooniverse subject upload complete.")
#  El proyecto "Sky Sounds" tiene asociado el identificador 13586.
project = Project('13586')

# ------- Subject set de imágenes -------
# Conexión con el subject set correspondiente o creación de uno nuevo en caso
# de que este no exista.
try:
    # Comprueba si existe el subject set.
    subject_set = SubjectSet.where(project_id=project.id,
                                   display_name=image_set_name).next()
except StopIteration:
    # Crea un nuevo subject set para los nuevos datos y lo asocia al proyecto.
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = image_set_name
    subject_set.save()

# Adicción de las muestras al subject set.
with open(manifest_images_file, 'r') as mani_file:
    print('Uploading image_set')
    r = csv.DictReader(mani_file)
    for line in r:
        subject = Subject()
        subject.links.project = project

        subject.add_location(line['lc'])
        subject.add_location(line['sp'])
        subject.metadata['subject_id'] = line['id']
        subject.save()
        subject_set.add(subject.id)
Esempio n. 21
0
def create(project_id, display_name):
    subject_set = SubjectSet()
    subject_set.links.project = project_id
    subject_set.display_name = display_name
    subject_set.save()
    echo_subject_set(subject_set)
Esempio n. 22
0
    def upload_chunks(self,
                      chunks: str,
                      project_id: int,
                      set_name: str,
                      zooniverse_login="",
                      zooniverse_pwd="",
                      amount: int = 1000,
                      ignore_errors: bool = False,
                      **kwargs):
        """Uploads ``amount`` audio chunks from the CSV dataframe `chunks` to a zooniverse project.

        :param chunks: path to the chunk CSV dataframe
        :type chunks: [type]
        :param project_id: zooniverse project id
        :type project_id: int
        :param set_name: name of the subject set
        :type set_name: str
        :param zooniverse_login: zooniverse login. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_LOGIN`` instead, defaults to ''
        :type zooniverse_login: str, optional
        :param zooniverse_pwd: zooniverse password. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_PWD`` instead, defaults to ''
        :type zooniverse_pwd: str, optional
        :param amount: amount of chunks to upload, defaults to 0
        :type amount: int, optional
        """

        self.chunks_file = chunks
        self.get_credentials(zooniverse_login, zooniverse_pwd)

        metadata_location = os.path.join(self.chunks_file)
        try:
            self.chunks = pd.read_csv(metadata_location, index_col="index")
        except:
            raise Exception("cannot read chunk metadata from {}.".format(
                metadata_location))

        assert_dataframe("chunks", self.chunks)
        assert_columns_presence(
            "chunks",
            self.chunks,
            {"recording_filename", "onset", "offset", "uploaded", "mp3"},
        )

        from panoptes_client import Panoptes, Project, Subject, SubjectSet

        Panoptes.connect(username=self.zooniverse_login,
                         password=self.zooniverse_pwd)
        zooniverse_project = Project(project_id)

        subjects_metadata = []
        uploaded = 0

        subject_set = None

        for ss in zooniverse_project.links.subject_sets:
            if ss.display_name == set_name:
                subject_set = ss

        if subject_set is None:
            subject_set = SubjectSet()
            subject_set.links.project = zooniverse_project
            subject_set.display_name = set_name
            subject_set.save()

        subjects = []

        chunks_to_upload = self.chunks[self.chunks["uploaded"] == False].head(
            amount)
        chunks_to_upload = chunks_to_upload.to_dict(orient="index")

        if len(chunks_to_upload) == 0:
            print("nothing left to upload.")
            return

        for chunk_index in chunks_to_upload:
            chunk = chunks_to_upload[chunk_index]

            print("uploading chunk {} ({},{})".format(
                chunk["recording_filename"], chunk["onset"], chunk["offset"]))

            subject = Subject()
            subject.links.project = zooniverse_project
            subject.add_location(
                os.path.join(os.path.dirname(self.chunks_file), "chunks",
                             chunk["mp3"]))
            subject.metadata["date_extracted"] = chunk["date_extracted"]

            try:
                subject.save()
            except Exception as e:
                print("failed to save chunk {}. an exception has occured:\n{}".
                      format(chunk_index, str(e)))
                print(traceback.format_exc())

                if args.ignore_errors:
                    continue
                else:
                    print("subject upload halting here.")
                    break

            subjects.append(subject)

            chunk["index"] = chunk_index
            chunk["zooniverse_id"] = str(subject.id)
            chunk["project_id"] = str(project_id)
            chunk["subject_set"] = str(subject_set.display_name)
            chunk["uploaded"] = True
            subjects_metadata.append(chunk)

        if len(subjects) == 0:
            return

        subject_set.add(subjects)

        self.chunks.update(pd.DataFrame(subjects_metadata).set_index("index"))

        self.chunks.to_csv(self.chunks_file)
Esempio n. 23
0
try:
    Panoptes.connect(username=zcfg.login['user'], password=zcfg.login['pass'])
    project = Project.find("6307")
except Exception as e:
    f = open(logfile, "a")
    t = time.localtime()

    f.write('Unable to connect to Zooniverse: '+time.strftime("%D:%H:%M:%S",t)+'\n')
    f.close()


subject_set = SubjectSet()
s = Subject()

subject_set.links.project = project
subject_set.display_name = 'Tutorial subject set 2'

images = glob.glob(path)
new_subjects = []

for img in images:
    try:
        s = Subject()
        s.links.project = project
        # manifest file
        if os.path.splitext(img)[1] == ".csv":   # upload manifest info.... not sure how this will be set up after second step
            # move csv to complete images folder
            shutil.copy(f, completed_images)
            # make dict out of csv file for upload
            manifest = csv.DictReader(open(img))
            s.metadata.update(manifest)
Esempio n. 24
0
def create_subjects_and_link_to_project(proto_subjects,
                                        project_id,
                                        subject_set_id,
                                        subject_set_name=None):
    ''' find the project and relevant subject set. Get the existing subject data and compare to the new proto_subjects.
    Upload any instances of nbew subjects to the project

    Keyword Arguments:
    proto_subjects -- dictionary structure containing subject filepath+filename, and associated metadata
    project_id -- identifier to find and link with the project
    subject_set_id -- identifier for the subject set of interest
    '''

    # get the project object
    project = Project.find(project_id)

    # set up subject_set
    if subject_set_id == None:
        subject_set = SubjectSet()  # create empty subject_set
        subject_set.links.project = project

        if subject_set_name == None:  # if not defined generate a random subject set name to avoid error when a set already exists
            subject_set_name = 'subject_set_{:02d}_{:02d}_{:04d}_{}'.format(
                date.day, date.month, date.year,
                ''.join(generate_random_str()))
        print("will create a subject set called: {}".format(subject_set_name))
        subject_set.display_name = subject_set_name  # set the name of the subject set
        subject_set.save()
        project.reload()
    else:
        subject_set = SubjectSet().find(
            subject_set_id)  # find the existing subject_set
        existing_subject_set_name = subject_set.display_name  # get its name

        # if you have tried to set the subject set name, check that it matches the name for the chosen subject set id
        if (subject_set_name != None) and (existing_subject_set_name !=
                                           subject_set_name):
            print(
                "your chosen subject set name does not match the existing name: {}, {}"
                .format(subject_set_name, existing_subject_set_name))
            return -1
        else:
            subject_set_name = existing_subject_set_name

        print("add to existing subject set: {}".format(subject_set_name))

    # Create a list of the existing subject metadata
    meta_list = []
    print("existing subjects:")
    for subject in subject_set.subjects:
        print(subject.id, subject.metadata)
        meta_list.append(subject.metadata)

    # When making list of subjects to add, check to see if the metadata of the subject you want to add is already in the set
    print("new subjects:")
    new_subjects = []
    for filename, metadata in proto_subjects.items():

        # check if this subject is already in the subject set
        if np.isin(metadata, meta_list):
            print("{}, subject already in set".format(metadata))
            # In this case we skip over the subject that already exists.
            # N.B. you may want to remove an existing subject and update it with the new one
            continue

        # Otherwise we can add the subject to the new subject list
        else:
            subject = Subject()

            subject.links.project = project
            subject.add_location(filename)

            subject.metadata.update(metadata)

            subject.save()
            new_subjects.append(subject)
            print("{}, new subject add to list".format(metadata))

    print("new subjects to add: {}".format(new_subjects))

    # add the new subject list (data and metadata) to the already defined project subject set
    subject_set.add(new_subjects)

    return
def upload_manifest_to_galaxy_zoo(
        subject_set_name,
        manifest,
        project_id='5733',  # default to main GZ project
        login_loc='zooniverse_login.txt'):
    """
    Save manifest (set of galaxies with metadata prepared) to Galaxy Zoo

    Args:
        subject_set_name (str): name for subject set
        manifest (list): containing dicts of form {png_loc: img.png, key_data: {metadata_col: metadata_value}}
        project_id (str): panoptes project id e.g. '5733' for Galaxy Zoo, '6490' for mobile
        n_processes (int): number of processes with which to upload galaxies in parallel

    Returns:
        None
    """
    assert os.path.exists(login_loc)
    if 'TEST' in subject_set_name:
        logging.warning('Testing mode detected - not uploading!')
        return manifest

    if project_id == '5733':
        logging.info('Uploading to Galaxy Zoo project 5733')
    elif project_id == '6490':
        logging.info('Uploading to mobile app project 6490')
    elif project_id == '8751':
        logging.info('Uploading to staging project 8751')
    else:
        logging.info('Uploading to unknown project {}'.format(project_id))

    # Important - don't commit the password!
    zooniverse_login = read_data_from_txt(login_loc)
    Panoptes.connect(**zooniverse_login)

    project = Project.find(project_id)

    # check if subject set already exists
    subject_set = None
    subject_sets = SubjectSet.where(project_id=project_id)
    for candidate_subject_set in subject_sets:
        if candidate_subject_set.raw['display_name'] == subject_set_name:
            # use if it already exists
            subject_set = candidate_subject_set
    if not subject_set:  # make a new one if not
        subject_set = SubjectSet()
        subject_set.links.project = project
        subject_set.display_name = subject_set_name
        subject_set.save()

    pbar = tqdm(total=len(manifest), unit=' subjects uploaded')

    save_subject_params = {'project': project, 'pbar': pbar}
    save_subject_partial = functools.partial(save_subject,
                                             **save_subject_params)

    # upload in async blocks, to avoid huge join at end
    manifest_block_start = 0
    manifest_block_size = 100

    while True:
        manifest_block = manifest[manifest_block_start:manifest_block_start +
                                  manifest_block_size]

        new_subjects = []
        with Subject.async_saves():
            for manifest_entry in manifest_block:
                new_subjects.append(save_subject_partial(manifest_entry))

        subject_set.add(new_subjects)
        logging.info('{} subjects linked'.format(new_subjects))

        manifest_block_start += manifest_block_size
        if manifest_block_start > len(manifest):
            break

    return manifest  # for debugging only
Esempio n. 26
0
def create_subject_set(project_id: int, name: str):
    subject_set = SubjectSet()
    subject_set.links.project = Project(project_id)
    subject_set.display_name = name
    subject_set.save()
    return subject_set