def run(cyto_job, parameters):

    job = cyto_job.job
    project_id = cyto_job.project
    term_id = parameters.terms_list

    logging.info(f"########### Parameters = {str(parameters)}")
    logging.info(f"########### Term {str(term_id)}")
    logging.info(f"########### Project {str(project_id)}")

    annotations = AnnotationCollection()
    annotations.project = project_id
    annotations.terms = [term_id]
    annotations.fetch()

    progress = 0
    progress_delta = 1.0 / (1.50 * len(annotations))

    job.update(
        progress=progress,
        statusComment=f"Converting annotations from project {project_id}")

    new_annotations = AnnotationCollection()
    for a in annotations:
        if a.location is None:
            a.fetch()
        new_annotations.append(
            Annotation(a.location, a.image, a.term, a.project))
    new_annotations.save(chunk=None)

    job.update(progress=0.25, statusComment=f"Deleting old annotations...")

    for a in annotations:
        a.delete()
        progress += progress_delta
        job.update(progress=progress)
Exemple #2
0
def preprocess(cytomine, working_path, id_project, id_terms=None, id_tags_for_images=None):
    """
    Get data from Cytomine in order to train YOLO.
    :param cytomine: The Cytomine client
    :param working_path: The path where files will be stored
    :param id_project: The Cytomine project ID used to get data
    :param id_terms: The Cytomine term IDS used to get data
    :param id_tags_for_images: The Cytomine tags IDS associated to images used to get data
    :return:
        classes_filename: The name of the file with classes
        image_filenames: A list of image filenames
        annotation_filenames: A list of filenames with annotations in YOLO format
    """
    if not os.path.exists(working_path):
        os.makedirs(working_path)

    images_path = os.path.join(working_path, IMG_DIRECTORY)
    if not os.path.exists(images_path):
        os.makedirs(images_path)

    annotations_path = os.path.join(working_path, ANNOTATION_DIRECTORY)
    if not os.path.exists(annotations_path):
        os.makedirs(annotations_path)

    terms = TermCollection().fetch_with_filter("project", id_project)
    if id_terms:
        filtered_term_ids = [int(id_term) for id_term in id_terms.split(',')]
        filtered_terms = [term for term in terms if term.id in filtered_term_ids]
    else:
        filtered_terms = terms
    terms_indexes = {term.id: i for i, term in enumerate(filtered_terms)}

    # https://github.com/eriklindernoren/PyTorch-YOLOv3#train-on-custom-dataset
    # Write obj.names
    classes_filename = os.path.join(working_path, CLASSES_FILENAME)
    with open(classes_filename, 'w') as f:
        for term in filtered_terms:
            f.write(term.name + os.linesep)

    # Download images
    image_filenames = []
    image_tags = id_tags_for_images if id_tags_for_images else None
    images = ImageInstanceCollection(tags=image_tags).fetch_with_filter("project", id_project)
    for image in images:
        image.dump(os.path.join(working_path, IMG_DIRECTORY, "{id}.png"), override=False)
        image_filenames.append(image.filename)

    # Create annotation files
    annotation_filenames = []
    for image in images:
        annotations = AnnotationCollection()
        annotations.image = image.id
        annotations.terms = [t.id for t in filtered_terms] if id_terms else None
        annotations.showWKT = True
        annotations.showTerm = True
        annotations.fetch()

        filename = os.path.join(working_path, ANNOTATION_DIRECTORY, "{}.txt".format(image.id))
        with open(filename, 'w') as f:
            for annotation in annotations:
                geometry = wkt.loads(annotation.location)
                x, y, w, h = geometry_to_yolo(geometry, image.width, image.height)
                for term_id in annotation.term:
                    # <object-class> <x_center> <y_center> <width> <height>
                    f.write("{} {:.12f} {:.12f} {:.12f} {:.12f}".format(terms_indexes[term_id], x, y, w, h) + os.linesep)
        annotation_filenames.append(filename)

    return classes_filename, image_filenames, annotation_filenames
Exemple #3
0
def main(argv):
    with CytomineJob.from_cli(argv) as cj:
        cj.job.update(progress=1, statusComment="Initialisation")
        cj.log(str(cj.parameters))

        term_ids = [int(term_id) for term_id in cj.parameters.cytomine_id_terms.split(",")]
        terms = TermCollection().fetch_with_filter("project", cj.parameters.cytomine_id_project)
        terms = [term for term in terms if term.id in term_ids]

        image_ids = [int(image_id) for image_id in cj.parameters.cytomine_id_images.split(",")]
        images = ImageInstanceCollection(light=True).fetch_with_filter("project", cj.parameters.cytomine_id_project)
        images = [image for image in images if image.id in image_ids]

        if hasattr(cj.parameters, "cytomine_id_users") and cj.parameters.cytomine_id_users is not None:
            user_ids = [int(user_id) for user_id in cj.parameters.cytomine_id_users.split(",")]
        else:
            user_ids = []

        if hasattr(cj.parameters, "cytomine_id_jobs") and cj.parameters.cytomine_id_jobs is not None:
            job_ids = [int(job_id) for job_id in cj.parameters.cytomine_id_jobs.split(",")]
            jobs = JobCollection(project=cj.parameters.cytomine_id_project).fetch()
            jobs = [job for job in jobs if job.id in job_ids]
        else:
            jobs = []

        userjobs_ids = [job.userJob for job in jobs]
        all_user_ids = user_ids + userjobs_ids

        cj.job.update(progress=20, statusComment="Collect data")
        ac = AnnotationCollection()
        ac.terms = term_ids
        ac.images = image_ids
        ac.showMeta = True
        ac.showGIS = True
        ac.showTerm = True
        ac.reviewed = True if cj.parameters.cytomine_reviewed_only else None
        ac.users = all_user_ids if len(all_user_ids) > 0 else None
        ac.fetch()

        cj.job.update(progress=55, statusComment="Compute statistics")
        data = dict()
        for image in images:
            d = dict()
            areas = [a.area for a in ac if a.image == image.id]
            total_area = np.sum(areas)
            d['total'] = total_area
            d['count'] = len(areas)
            d['ratio'] = 1.0
            for term in terms:
                annotations = [a for a in ac if a.image == image.id and term.id in a.term]
                areas = [a.area for a in annotations]
                d[term.name] = dict()
                d[term.name]['total'] = np.sum(areas)
                d[term.name]['count'] = len(annotations)
                d[term.name]['ratio'] = d[term.name]['total'] / float(total_area) if total_area > 0 else 0
                d[term.name]['mean'] = np.mean(areas)
                d[term.name]['annotations'] = [{"created": a.created, "area": a.area} for a in annotations]
            data[image.instanceFilename] = d

        cj.job.update(progress=90, statusComment="Write CSV report")
        with open("stat-area.csv", "w") as f:
            for l in write_csv(data, terms):
                f.write("{}\n".format(l))

        job_data = JobData(id_job=cj.job.id, key="Area CSV report", filename="stat-area.csv")
        job_data = job_data.save()
        job_data.upload("stat-area.csv")
        
        cj.job.update(statusComment="Finished.", progress=100)