def createZooniverseProject(projName, projDesc, primLang, flag_hidden):

    print('--- --- --- ---')
    print('Establishing connection to Zooniverse and creating project')

    notSaved = True
    saveCheck = 0
    project = None
    connected = False

    while not connected:
        url = 'http://zooniverse.org/'
        print('Attempting connection.')
        try:
            response = requests.get(url, timeout=0.2)
        except ConnectionError as ce:
            print(ce)
        except HTTPError as he:
            print(he)
        except Timeout as to:
            print(to)
        else:
            print(response)
            connected = True

    while (notSaved and (saveCheck < 5)):
        notSaved = False
        #Make a new project
        project = Project()

        #Project name
        #tutorial_project.display_name = ('{}_test'.format(now))
        project.display_name = projName
        saveCheck += 1

        #Project description
        project.description = projDesc

        #Project language
        project.primary_language = primLang

        #Project visibility
        project.private = flag_hidden

        try:
            project.save()
        except PanoptesAPIException as e:
            print('!!! {} , Waiting 10 seconds...'.format(e))
            notSaved = True
            for i in range(0, 10):
                print('... Waiting {}...'.format(i))
                time.sleep(3)
            project.delete()
            saveCheck += 1

    print('Project successfully created.')

    return project
Example #2
0
def create(display_name, description, primary_language, public, quiet):
    """
    Creates a new project.

    Prints the project ID and name of the new project.
    """

    project = Project()
    project.display_name = display_name
    project.description = description
    project.primary_language = primary_language
    project.private = not public
    project.save()

    if quiet:
        click.echo(project.id)
    else:
        echo_project(project)
def pushNewSubjectSet(args, customArgs, projID):

    args['F_livePost'] = True

    connection = panoptesConnect(args['username'], args['password'])
    args['zooniverseConnection'] = connection

    #Get existing project
    project = Project(projID)
    if project == None:
        print('Could not find this project')
        return None
    print(project.display_name)
    args['project'] = project

    #Create new subject set
    subjectSet = createSubjectSet(args['subjectSetTitle'], args['project'])
    args['subjectSet'] = subjectSet

    #Create new subjects and populate project with filled subject set
    createSubjects(args, customArgs)

    return args
Example #4
0
    def retrieve_classifications(self,
                                 destination: str,
                                 project_id: int,
                                 zooniverse_login: str = "",
                                 zooniverse_pwd: str = "",
                                 chunks: List[str] = [],
                                 **kwargs):
        """Retrieve classifications from Zooniverse as a CSV dataframe.
        They will be matched with the original chunks metadata if the path one 
        or more chunk metadata files is provided.

        :param destination: output CSV dataframe destination
        :type destination: str
        :param project_id: zooniverse project id
        :type project_id: int
        :param zooniverse_login: zooniverse login. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_LOGIN`` instead, defaults to ''
        :type zooniverse_login: str, optional
        :param zooniverse_pwd: zooniverse password. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_PWD`` instead, defaults to ''
        :type zooniverse_pwd: str, optional
        :param chunks: the list of chunk metadata files to match the classifications to. If provided, only the classifications that have a match will be returned.
        :type chunks: List[str], optional
        """
        self.get_credentials(zooniverse_login, zooniverse_pwd)

        from panoptes_client import Panoptes, Project, Classification
        Panoptes.connect(username=self.zooniverse_login,
                         password=self.zooniverse_pwd)
        project = Project(project_id)

        answers_translation_table = []
        for workflow in project.links.workflows:
            workflow_id = workflow.id
            for task_id in workflow.tasks:
                n = 0
                for answer in workflow.tasks[task_id]["answers"]:
                    answers_translation_table.append({
                        "workflow_id":
                        str(workflow_id),
                        "task_id":
                        str(task_id),
                        "answer_id":
                        str(n),
                        "answer":
                        answer["label"],
                    })
                    n += 1

        answers_translation_table = pd.DataFrame(answers_translation_table)

        classifications = []
        for c in Classification.where(scope="project",
                                      page_size=1000,
                                      project_id=project_id):
            classifications.append(c.raw)

        classifications = pd.DataFrame(classifications)
        classifications["user_id"] = classifications["links"].apply(
            lambda s: s["user"])
        classifications["subject_id"] = (classifications["links"].apply(
            lambda s: s["subjects"][0]).astype(int))
        classifications["workflow_id"] = classifications["links"].apply(
            lambda s: s["workflow"])
        classifications["tasks"] = classifications["annotations"].apply(
            lambda s: [(str(r["task"]), str(r["value"])) for r in s])
        classifications = classifications.explode("tasks")
        classifications["task_id"] = classifications["tasks"].str[0]
        classifications["answer_id"] = classifications["tasks"].str[1]
        classifications.drop(columns=["tasks"], inplace=True)

        classifications = classifications[[
            "id", "user_id", "subject_id", "task_id", "answer_id",
            "workflow_id"
        ]]
        classifications = classifications.merge(
            answers_translation_table,
            left_on=["workflow_id", "task_id", "answer_id"],
            right_on=["workflow_id", "task_id", "answer_id"],
        )

        if chunks:
            chunks = pd.concat([pd.read_csv(f) for f in chunks])

            classifications = classifications.merge(chunks,
                                                    left_on="subject_id",
                                                    right_on="zooniverse_id")

        classifications.set_index("id").to_csv(destination)
Example #5
0
    def upload_chunks(self,
                      chunks: str,
                      project_id: int,
                      set_name: str,
                      zooniverse_login="",
                      zooniverse_pwd="",
                      amount: int = 1000,
                      ignore_errors: bool = False,
                      **kwargs):
        """Uploads ``amount`` audio chunks from the CSV dataframe `chunks` to a zooniverse project.

        :param chunks: path to the chunk CSV dataframe
        :type chunks: [type]
        :param project_id: zooniverse project id
        :type project_id: int
        :param set_name: name of the subject set
        :type set_name: str
        :param zooniverse_login: zooniverse login. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_LOGIN`` instead, defaults to ''
        :type zooniverse_login: str, optional
        :param zooniverse_pwd: zooniverse password. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_PWD`` instead, defaults to ''
        :type zooniverse_pwd: str, optional
        :param amount: amount of chunks to upload, defaults to 0
        :type amount: int, optional
        """

        self.chunks_file = chunks
        self.get_credentials(zooniverse_login, zooniverse_pwd)

        metadata_location = os.path.join(self.chunks_file)
        try:
            self.chunks = pd.read_csv(metadata_location, index_col="index")
        except:
            raise Exception("cannot read chunk metadata from {}.".format(
                metadata_location))

        assert_dataframe("chunks", self.chunks)
        assert_columns_presence(
            "chunks",
            self.chunks,
            {"recording_filename", "onset", "offset", "uploaded", "mp3"},
        )

        from panoptes_client import Panoptes, Project, Subject, SubjectSet

        Panoptes.connect(username=self.zooniverse_login,
                         password=self.zooniverse_pwd)
        zooniverse_project = Project(project_id)

        subjects_metadata = []
        uploaded = 0

        subject_set = None

        for ss in zooniverse_project.links.subject_sets:
            if ss.display_name == set_name:
                subject_set = ss

        if subject_set is None:
            subject_set = SubjectSet()
            subject_set.links.project = zooniverse_project
            subject_set.display_name = set_name
            subject_set.save()

        subjects = []

        chunks_to_upload = self.chunks[self.chunks["uploaded"] == False].head(
            amount)
        chunks_to_upload = chunks_to_upload.to_dict(orient="index")

        if len(chunks_to_upload) == 0:
            print("nothing left to upload.")
            return

        for chunk_index in chunks_to_upload:
            chunk = chunks_to_upload[chunk_index]

            print("uploading chunk {} ({},{})".format(
                chunk["recording_filename"], chunk["onset"], chunk["offset"]))

            subject = Subject()
            subject.links.project = zooniverse_project
            subject.add_location(
                os.path.join(os.path.dirname(self.chunks_file), "chunks",
                             chunk["mp3"]))
            subject.metadata["date_extracted"] = chunk["date_extracted"]

            try:
                subject.save()
            except Exception as e:
                print("failed to save chunk {}. an exception has occured:\n{}".
                      format(chunk_index, str(e)))
                print(traceback.format_exc())

                if args.ignore_errors:
                    continue
                else:
                    print("subject upload halting here.")
                    break

            subjects.append(subject)

            chunk["index"] = chunk_index
            chunk["zooniverse_id"] = str(subject.id)
            chunk["project_id"] = str(project_id)
            chunk["subject_set"] = str(subject_set.display_name)
            chunk["uploaded"] = True
            subjects_metadata.append(chunk)

        if len(subjects) == 0:
            return

        subject_set.add(subjects)

        self.chunks.update(pd.DataFrame(subjects_metadata).set_index("index"))

        self.chunks.to_csv(self.chunks_file)
# connect to zooniverse
Panoptes.connect(username=zooniverse_config.Zooniverse_USERNAME,
                 password=zooniverse_config.Zooniverse_PASS)
project = Project.find(zooniverse_config.Project_ID)

# connection to mongodb
mongoConn = MongoClient(csh_db_config.DB_HOST + ":" +
                        str(csh_db_config.DB_PORT))
cshTransDB = mongoConn[csh_db_config.TRANSCRIPTION_DB_NAME]
cshTransDB.authenticate(csh_db_config.TRANSCRIPTION_DB_USER,
                        csh_db_config.TRANSCRIPTION_DB_PASS)
cshCollection = cshTransDB[csh_db_config.TRANS_DB_MeetingMinColl]
cshSubjectSets = cshTransDB[csh_db_config.TRANS_DB_SubjectSets]

classification_export = Project(
    zooniverse_config.Project_ID).get_export('classifications')
classification = classification_export.content.decode('utf-8')

#Traverses through each row of classifications in the JSON file created by zooniverse and assigns them to appropriate headers
for row in csv.DictReader(io.StringIO(classification)):

    annotations = json.loads(row['annotations'])
    subject_data = json.loads(row['subject_data'])
    transcription_question_1 = ''
    transcription_text_1 = ''
    transcription_question_2 = ''
    transcription_text_2 = ''
    transcription_filename = ''

    subject_id = row['subject_ids']
    subject_id = str(subject_id)
Example #7
0
    if save:
        build_file += build_part

    with open(project_file, 'r') as p_file:
        r = csv.DictReader(p_file)
        project_list = []
        for row in r:
            project_list.append(row['projects'])

    i = 0
    for prjct in project_list:
        sys.stdout.write('processing..')
        sys.stdout.flush()
        try:
            build_part = "{:<8},{}".format(prjct,
                                           Project(
                                               int(prjct)).display_name) + '\n'
            build_part += "{:<12},{:<14},{:<28},{:<28},{:<10},{:12},{}".format(
                'Project_id', 'Workflow_id', 'Created date', 'Finished date',
                'Subjects', 'Retirement', 'Workflow name') + '\n'
            for workflow_id, project_id in all_workflows:
                i += 1
                if i % 5 == 0:
                    sys.stdout.write('.')
                    sys.stdout.flush()
                if prjct == project_id:
                    wrkflw = Workflow(int(workflow_id))
                    finished_at = wrkflw.finished_at
                    if finished_at is None:
                        finished_at = ' '
                    build_part += u"{:<12},{:<14},{:<28},{:<28},{:<10},{:<12},{}".format(
                        prjct, wrkflw.id, wrkflw.created_at, finished_at,
Example #8
0
PROJECT_ID = 6767
PROCESSED_SUBJECTS_FILE = 'processed_subjects.txt'
PROCESSED_SETS_FILE = 'processed_sets.txt'


with open('config.yaml') as config_f:
    config = yaml.load(config_f, Loader=yaml.FullLoader)

if os.path.isfile(PROCESSED_SETS_FILE):
    with open(PROCESSED_SETS_FILE) as processed_f:
        processed_sets = { s.strip() for s in processed_f.readlines() }
else:
    processed_sets = set()

Panoptes.connect(**config)
project = Project(PROJECT_ID)

with open(PROCESSED_SETS_FILE, 'a') as processed_sets_f:
    for subject_set in project.links.subject_sets:
        if subject_set.id in processed_sets:
            continue
        with ChargingBar(
            'Updating {}'.format(subject_set.display_name),
            max=subject_set.set_member_subjects_count,
            suffix='%(percent).1f%% %(eta_td)s'
        ) as bar:
            with Subject.async_saves():
                for subject in Subject.where(subject_set_id=subject_set.id, page_size=100):
                    bar.next()
                    if '!CERiT' in subject.metadata:
                        continue
Example #9
0
def create_subject_set(project_id: int, name: str):
    subject_set = SubjectSet()
    subject_set.links.project = Project(project_id)
    subject_set.display_name = name
    subject_set.save()
    return subject_set
Example #10
0
def main():
    # connect to zooniverse
    Panoptes.connect(username=zooniverse_config.Zooniverse_USERNAME, password=zooniverse_config.Zooniverse_PASS)
    project = Project.find(zooniverse_config.Project_ID)

    # connection to mongodb
    mongoConn = MongoClient(csh_db_config.DB_HOST + ":" + str(csh_db_config.DB_PORT))
    cshTransDB = mongoConn[csh_db_config.TRANSCRIPTION_DB_NAME]
    cshTransDB.authenticate(csh_db_config.TRANSCRIPTION_DB_USER,
                            csh_db_config.TRANSCRIPTION_DB_PASS)
    cshCollection = cshTransDB[csh_db_config.TRANS_DB_MeetingMinColl]
    cshSubjectSets = cshTransDB[csh_db_config.TRANS_DB_SubjectSets]

    classification_export = Project(zooniverse_config.Project_ID).get_export('classifications')
    classification = classification_export.content.decode('utf-8')

    # keep track of the number of classifications
    num_classifications = 0

    # traverses through each row of classifications and assigns them to appropriate headers
    for row in csv.DictReader(io.StringIO(classification)):
        annotations = json.loads(row['annotations'])
        subject_data = json.loads(row['subject_data'])
        transcription_question_1 = ''
        transcription_text_1 = ''
        transcription_question_2 = ''
        transcription_text_2 = ''
        transcription_filename = ''

        subject_id = row['subject_ids']
        subject_id = str(subject_id)

        # parse the JSON output from Zooniverse into individual fields
        for task in annotations:
            try:
                if 'Is there a word in this image?' in task['task_label']:
                    if task['value'] is not None:
                        transcription_question_1 = str(task['task_label'])
                        transcription_text_1 = str(task['value'])
                        num_classifications += 1
            except KeyError:
                try:
                    if 'Please type the word(s) that appears in this image' in task['task_label']:
                        if task['value'] is not None:
                            transcription_question_2 = str(task['task_label'])
                            transcription_text_2 = str(task['value'])
                except KeyError:
                    continue

            # retrieve and update the record from MongoDB
            updateQuery = {
                '$set':{
                    'responses': [{
                        'labellerId': row['user_id'],
                        'type'      : transcription_text_1,
                        'label'     : transcription_text_2
                    }],
                    'transcription': {
                        'status'   : 'done'
                    }
                }
            }
            record = cshCollection.find_one_and_update({'_id': transcription_filename}, updateQuery)

    print('{} classifications retrieved from Zooniverse and records updated in MongoDB'.format(num_classifications))
except:
    print('ERROR: No se ha podido leer el fichero de configuración.')
    sys.exit(1)

#  modify path and file name as needed:
manifest_images_file = manifest_path + "manifest_images_" + month + ".csv"
manifest_sounds_file = manifest_path + "manifest_sounds_" + month + ".csv"

image_set_name = 'image_set_' + month
audio_set_name = 'audio_set_' + month

# Conexión con Panoptes
Panoptes.connect(username=user, password=passwd)

#  El proyecto "Sky Sounds" tiene asociado el identificador 13586.
project = Project('13586')

# ------- Subject set de imágenes -------
# Conexión con el subject set correspondiente o creación de uno nuevo en caso
# de que este no exista.
try:
    # Comprueba si existe el subject set.
    subject_set = SubjectSet.where(project_id=project.id,
                                   display_name=image_set_name).next()
except StopIteration:
    # Crea un nuevo subject set para los nuevos datos y lo asocia al proyecto.
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = image_set_name
    subject_set.save()
    logger.info("Imported Manifest file {} with {} records".format(
        args['manifest'], len(mani.keys())))

    # read Zooniverse credentials
    config = read_config_file(args['password_file'])

    ###################################
    # Create Zooniverse Connection
    # Fetch/Create SubjectSet
    ###################################

    # connect to panoptes
    connect_to_panoptes()

    # Get Project
    my_project = Project(args['project_id'])

    # get or create a subject set
    if args['subject_set_id'] is not None:
        my_set = get_subject_set(args['subject_set_id'],
                                 args['subject_set_name'])
    else:
        my_set = uploader.create_subject_set(my_project,
                                             args['subject_set_name'])
        logger.info("Created new subject set with id {}, name {}".format(
            my_set.id, my_set.display_name))

    ###################################
    # Create Stats Variables
    ###################################
subject_data_file = 'WO399_11Jun2020-trunc.tsv'
subject_file_list = 'wo_399_file_list.txt'
subject_file_root = credentials.subject_file_root
subject_file_old_root_re = ''
if credentials.subject_file_old_root != '':
    subject_file_old_root_re = re.compile(credentials.subject_file_old_root)
file_inventory = defaultdict(list)

project_id = 11982
# we are only interested in the Piece-level records in the catalogue export.
# the following regex matches Piece references (and lower) only.
piece_ref_re = re.compile(r'WO 399/(\d+)')
file_path_extraction_re = re.compile(r'wo\\399\\(\d+)')

Panoptes.connect(username=credentials.username, password=credentials.password)
project = Project(project_id)

# Read the subject_data_file to get the names and docrefs of the documents
# Identify the images that belong to a subject set and upload them


def create_subject_set(docref, name):
    print("Attempting to create a subject set via the Zooniverse API")
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = docref + " - " + name
    subject_set.save()
    return subject_set


with open(subject_file_list, 'r') as f: