def __init__(self,project_id,environment): AggregationAPI.__init__(self,project_id,environment)#"tate")#"tate",environment="staging") # the code to extract the relevant params froma text json file self.marking_params_per_shape["text"] = relevant_text_params # the code to cluster lines together self.default_clustering_algs["text"] = TextCluster self.default_clustering_algs["image"] = BlobClustering # the code for reducing a line segment (4d) into a 2d object # todo - can probably replace this with the standard for line segments # self.reduction_algs["text"] = text_line_reduction # load in the tag file api_details = yaml.load(open("/app/config/aggregation.yml","rb")) try: tag_file = api_details[self.project_id]["tags"] self.additional_clustering_args = {"text": {"reduction":text_line_reduction,"tag_file":tag_file}} except: self.additional_clustering_args = {"text": {"reduction":text_line_reduction}} self.ignore_versions = True # self.instructions[683] = {} self.instructions[121] = {} self.only_retired_subjects = False self.only_recent_subjects = True self.rollbar_token = None
def __init__(self): AggregationAPI.__init__(self,"tate") self.marking_params_per_shape = dict() self.marking_params_per_shape["text"] = text_mapping2 self.workflows[683] = {},{"init":["text"]}
def __init__(self, project_id, environment, end_date=None): AggregationAPI.__init__(self, project_id, environment, end_date=end_date) # just to stop me from using transcription on other projects if not int(project_id) in [245, 376]: raise ValueError('project_id must be either 245 or 376')
def __init__(self,project_id,csv_classification_file): AggregationAPI.__init__(self,project_id,"development") # read in the csv file as a dataframe (pandas) self.classifications_dataframe = pandas.read_csv(csv_classification_file) # extract the subject id for each subject - based on the subject data field self.classifications_dataframe["subject_id"] = self.classifications_dataframe["subject_data"].map(lambda x: extract_subject_id(x)) self.aggregation_results = {}
def aggregate(project_id, token, href, metadata, environment): project = AggregationAPI(project_id, environment=environment) project.__aggregate__() tarpath = project.__csv_output__(compress=True) response = send_uploading(metadata, token, href) url = response.json()["media"][0]["src"] with open(tarpath, 'rb') as tarball: requests.put(url, headers={'Content-Type': 'application/x-gzip'}, data=tarball) os.remove(tarpath) send_finished(metadata, token, href)
def __setup__(self): """ do setup specifically for annotate and shakespeare's world. things like using a special classification algorithm (which is able to retire subject) and text clustering algorithms specifically designed for annotate/shakespeare's world :return: """ AggregationAPI.__setup__(self) workflow_id = self.workflows.keys()[0] # set the classification algorithm which will retire the subjects self.__set_classification_alg__(SubjectRetirement, self) self.instructions[workflow_id] = {} # set the function which will extract the relevant params for processing transcription annotations self.marking_params_per_shape[ "text"] = helper_functions.relevant_text_params # set up the text clustering algorithm # todo - this might not be necesary anymore additional_text_args = { "reduction": helper_functions.text_line_reduction } # load in the tag file if there is one param_details = yaml.load(open("/app/config/aggregation.yml", "rb")) if "tags" in param_details[self.project_id]: additional_text_args["tags"] = param_details[ self.project_id]["tags"] self.email_recipients = param_details[self.project_id]["email"] self.project_name = param_details[self.project_id]["project_name"] # now that we have the additional text arguments, convert text_algorithm from a class # to an actual instance if self.project_id == 245: import annotate self.text_algorithm = annotate.AnnotateClustering( "text", self, additional_text_args) self.output_tool = AnnotateOutput(self) elif self.project_id == 376: import folger self.text_algorithm = folger.FolgerClustering( "text", self, additional_text_args) self.output_tool = ShakespearesWorldOutput(self) else: raise ValueError('project_id must be either 245 or 376') self.image_algorithm = RectangleClustering( "image", self, {"reduction": helper_functions.rectangle_reduction}) self.only_retired_subjects = False self.only_recent_subjects = True
def __init__(self, project_id, csv_classification_file): AggregationAPI.__init__(self, project_id, "development") # read in the csv file as a dataframe (pandas) self.classifications_dataframe = pandas.read_csv( csv_classification_file) # extract the subject id for each subject - based on the subject data field self.classifications_dataframe[ "subject_id"] = self.classifications_dataframe["subject_data"].map( lambda x: extract_subject_id(x)) self.aggregation_results = {}
def __setup__(self): """ do setup specifically for annotate and shakespeare's world. things like using a special classification algorithm (which is able to retire subject) and text clustering algorithms specifically designed for annotate/shakespeare's world :return: """ AggregationAPI.__setup__(self) workflow_id = self.workflows.keys()[0] # set the classification algorithm which will retire the subjects self.__set_classification_alg__(SubjectRetirement,self) self.instructions[workflow_id] = {} # set the function which will extract the relevant params for processing transcription annotations self.marking_params_per_shape["text"] = helper_functions.relevant_text_params # set up the text clustering algorithm # todo - this might not be necesary anymore additional_text_args = {"reduction":helper_functions.text_line_reduction} # load in the tag file if there is one param_details = yaml.load(open("/app/config/aggregation.yml","rb")) if "tags" in param_details[self.project_id]: additional_text_args["tags"] = param_details[self.project_id]["tags"] self.email_recipients = param_details[self.project_id]["email"] self.project_name = param_details[self.project_id]["project_name"] # now that we have the additional text arguments, convert text_algorithm from a class # to an actual instance if self.project_id == 245: import annotate self.text_algorithm = annotate.AnnotateClustering("text",self,additional_text_args) self.output_tool = AnnotateOutput(self) elif self.project_id == 376: import folger self.text_algorithm = folger.FolgerClustering("text",self,additional_text_args) self.output_tool = ShakespearesWorldOutput(self) else: raise ValueError('project_id must be either 245 or 376') self.image_algorithm = RectangleClustering("image",self,{"reduction":helper_functions.rectangle_reduction}) self.only_retired_subjects = False self.only_recent_subjects = True
def __init__(self): AggregationAPI.__init__(self, 245) #"tate",environment="staging") self.marking_params_per_shape = dict() self.marking_params_per_shape["text"] = text_mapping2 # self.workflows[683] = {},{"init":["text"]} # self.versions[683] = 1 self.marking_params_per_shape["text"] = relevant_text_params reduction_algs = {"text": text_line_reduction} self.__set_clustering_algs__({"text": TextCluster}, reduction_algs) self.ignore_versions = True self.instructions[683] = {}
def __init__(self): AggregationAPI.__init__(self,245)#"tate",environment="staging") self.marking_params_per_shape = dict() self.marking_params_per_shape["text"] = text_mapping2 # self.workflows[683] = {},{"init":["text"]} # self.versions[683] = 1 self.marking_params_per_shape["text"] = relevant_text_params reduction_algs = {"text":text_line_reduction} self.__set_clustering_algs__({"text":TextCluster},reduction_algs) self.ignore_versions = True self.instructions[683] = {}
def __init__(self): AggregationAPI.__init__(self,245)#"tate",environment="staging") self.marking_params_per_shape = dict() self.marking_params_per_shape["text"] = text_mapping2 # self.workflows[683] = {},{"init":["text"]} # self.versions[683] = 1 self.marking_params_per_shape["text"] = relevant_text_params reduction_algs = {"text":text_line_reduction} self.__set_clustering_algs__({"text":TextCluster},reduction_algs) self.__set_classification_alg__(SubjectRetirement,{"host":self.host_api,"project_id":self.project_id,"token":self.token,"workflow_id":121}) self.ignore_versions = True self.instructions[683] = {}
def __exit__(self, exc_type, exc_value, traceback): """ report any errors via rollbar and shut down :param exc_type: :param exc_value: :param traceback: :return: """ if (exc_type is not None) and (self.environment == "production"): panoptes_file = open("/app/config/aggregation.yml","rb") api_details = yaml.load(panoptes_file) rollbar_token = api_details[self.environment]["rollbar"] rollbar.init(rollbar_token,self.environment) rollbar.report_exc_info() # calling the parent AggregationAPI.__exit__(self, exc_type, exc_value, traceback)
def __exit__(self, exc_type, exc_value, traceback): """ report any errors via rollbar and shut down :param exc_type: :param exc_value: :param traceback: :return: """ if (exc_type is not None) and (self.environment == "production"): panoptes_file = open("/app/config/aggregation.yml", "rb") api_details = yaml.load(panoptes_file) rollbar_token = api_details[self.environment]["rollbar"] rollbar.init(rollbar_token, self.environment) rollbar.report_exc_info() # calling the parent AggregationAPI.__exit__(self, exc_type, exc_value, traceback)
def __setup__(self): AggregationAPI.__setup__(self) workflow_id = self.workflows.keys()[0] # set the classification algorithm which will retire the subjects self.__set_classification_alg__(SubjectRetirement,self) self.instructions[workflow_id] = {} # set the function which will extract the relevant params for processing transcription annotations self.marking_params_per_shape["text"] = helper_functions.relevant_text_params # set up the text clustering algorithm # todo - this might not be necesary anymore additional_text_args = {"reduction":helper_functions.text_line_reduction} # load in the tag file if there is one api_details = yaml.load(open("/app/config/aggregation.yml","rb")) if "tags" in api_details[self.project_id]: additional_text_args["tags"] = api_details[self.project_id]["tags"] # now that we have the additional text arguments, convert text_algorithm from a class # to an actual instance if self.project_id == 245: import annotate self.text_algorithm = annotate.AnnotateClustering("text",self,additional_text_args) elif self.project_id == 376: import folger self.text_algorithm = folger.FolgerClustering("text",self,additional_text_args) else: assert False self.image_algorithm = RectangleClustering("image",self,{}) self.only_retired_subjects = False self.only_recent_subjects = True
def __init__(self): AggregationAPI.__init__(self, 245) #"tate",environment="staging") self.marking_params_per_shape = dict() self.marking_params_per_shape["text"] = text_mapping2 # self.workflows[683] = {},{"init":["text"]} # self.versions[683] = 1 self.marking_params_per_shape["text"] = relevant_text_params reduction_algs = {"text": text_line_reduction} self.__set_clustering_algs__({"text": TextCluster}, reduction_algs) self.__set_classification_alg__( SubjectRetirement, { "host": self.host_api, "project_id": self.project_id, "token": self.token, "workflow_id": 121 }) self.ignore_versions = True self.instructions[683] = {}
def __readin_tasks__(self,workflow_id): if self.project_id == 245: # marking_tasks = {"T2":["image"]} marking_tasks = {"T2":["text","image"]} # todo - where is T1? classification_tasks = {"T3" : True} return classification_tasks,marking_tasks elif self.project_id == 376: marking_tasks = {"T2":["text"]} classification_tasks = {} return classification_tasks,marking_tasks else: return AggregationAPI.__readin_tasks__(self,workflow_id)
def __readin_tasks__(self, workflow_id): tasks = {} if self.project_id == 245: tasks['marking'] = {"T2":["text","image"]} # TODO - where is T1? tasks['classification'] = {"T0":True,"T3" : True} elif self.project_id == 376: tasks['marking'] = {"T2":["text"]} tasks['classification'] = {"T0":True,"T3":True} print(AggregationAPI.__readin_tasks__(self, workflow_id)) else: raise ValueError('project_id must be either 245 or 376') return tasks
def __readin_tasks__(self, workflow_id): tasks = {} if self.project_id == 245: tasks['marking'] = {"T2": ["text", "image"]} # TODO - where is T1? tasks['classification'] = {"T0": True, "T3": True} elif self.project_id == 376: tasks['marking'] = {"T2": ["text"]} tasks['classification'] = {"T0": True, "T3": True} print(AggregationAPI.__readin_tasks__(self, workflow_id)) else: raise ValueError('project_id must be either 245 or 376') return tasks
def __readin_tasks__(self,workflow_id): if self.project_id == 245: # marking_tasks = {"T2":["image"]} marking_tasks = {"T2":["text","image"]} # todo - where is T1? classification_tasks = {"T0":True,"T3" : True} return classification_tasks,marking_tasks,{} elif self.project_id == 376: marking_tasks = {"T2":["text"]} classification_tasks = {"T0":True,"T3":True} print(AggregationAPI.__readin_tasks__(self,workflow_id)) return classification_tasks,marking_tasks,{} else: raise ValueError('project_id must be either 245 or 376')
def aggregate(project_id, token, href, metadata, environment): from aggregation_api import AggregationAPI with AggregationAPI(project_id, environment=environment) as project: project.__setup__() project.__aggregate__() with CsvOut(project) as writer: tarpath = writer.__write_out__(compress=True) response = send_uploading(metadata, token, href) url = response.json()["media"][0]["src"] with open(tarpath, 'rb') as tarball: requests.put(url, headers={'Content-Type': 'application/x-gzip'}, data=tarball) os.remove(tarpath) send_finished(metadata, token, href)
def __readin_tasks__(self,workflow_id): if self.project_id == 245: # marking_tasks = {"T2":["image"]} marking_tasks = {"T2":["text","image"]} # todo - where is T1? classification_tasks = {"T0":True,"T3" : True} return classification_tasks,marking_tasks,{} elif self.project_id == 376: marking_tasks = {"T2":["text"]} classification_tasks = {"T0":True,"T3":True} print(AggregationAPI.__readin_tasks__(self,workflow_id)) return classification_tasks,marking_tasks,{} else: assert False
__author__ = 'greg' import matplotlib.pyplot as plt from matplotlib.collections import PolyCollection import numpy as np import csv import json from aggregation_api import AggregationAPI import matplotlib.cbook as cbook import sys # subject_id = int(sys.argv[1]) # minimum_users = int(sys.argv[2]) subject_id = 511723 project = AggregationAPI(348, public_panoptes_connection=True) subject_image = project.__image_setup__(subject_id) for minimum_users in [8]: print minimum_users fig, ax = plt.subplots() image_file = cbook.get_sample_data(subject_image) image = plt.imread(image_file) # fig, ax = plt.subplots() im = ax.imshow(image) all_vertices = [] with open("/tmp/348/4_ComplexAMOS/vegetation_polygons_heatmap.csv",
__author__ = 'ggdhines' from aggregation_api import AggregationAPI import matplotlib.cbook as cbook import matplotlib.pyplot as plt from skimage.color import rgb2gray from sklearn.cluster import DBSCAN import numpy as np import math import cv2 # subject_id = 918463 subject_id = 917160 project = AggregationAPI(11,"development") fname = project.__image_setup__(subject_id) #478758 #917091 def convex_hull(points): """Computes the convex hull of a set of 2D points. Input: an iterable sequence of (x, y) pairs representing the points. Output: a list of vertices of the convex hull in counter-clockwise order, starting from the vertex with the lexicographically smallest coordinates. Implements Andrew's monotone chain algorithm. O(n log n) complexity. """ # Sort the points lexicographically (tuples are compared lexicographically). # Remove duplicates to detect the case we have just one unique point. points = sorted(list(set(points)))
s = (a + b + c) / 2.0 # Area of triangle by Heron's formula area = math.sqrt(s * (s - a) * (s - b) * (s - c)) circum_r = a * b * c / (4.0 * area) # Here's the radius filter. #print circum_r if circum_r < 1.0 / alpha: add_edge(edges, edge_points, coords, ia, ib) add_edge(edges, edge_points, coords, ib, ic) add_edge(edges, edge_points, coords, ic, ia) m = geometry.MultiLineString(edge_points) triangles = list(polygonize(m)) return cascaded_union(triangles), edge_points with AggregationAPI(11, "development") as whales: whales.__setup__() postgres_cursor = whales.postgres_session.cursor() select = "SELECT classification_subjects.subject_id,annotations from classifications INNER JOIN classification_subjects ON classification_subjects.classification_id = classifications.id where workflow_id = 84" postgres_cursor.execute(select) for subject_id, annotations in postgres_cursor.fetchall(): f_name = whales.__image_setup__(subject_id) image_file = cbook.get_sample_data(f_name[0]) image = plt.imread(image_file) fig, ax1 = plt.subplots(1, 1) ax1.imshow(image) plt.show()
#!/usr/bin/env python import sys sys.path.append("/home/greg/github/reduction/engine") sys.path.append("/home/ggdhines/PycharmProjects/reduction/engine") __author__ = 'greg' from aggregation_api import AggregationAPI import numpy workflow_id = 6 wildebeest = AggregationAPI(6) aggregations = wildebeest.__aggregate__(workflows = [6],store_values=False) marking_task = wildebeest.workflows[workflow_id][1].keys()[0] tools = wildebeest.workflows[workflow_id][1][marking_task] workflows,versions,instructions,updated_at_timestamps = wildebeest.__get_workflow_details__(workflow_id) tools_labels = instructions[workflow_id][marking_task]["tools"] for j,subject_id in enumerate(aggregations): overall_votes = {int(t_index): [] for t_index in range(len(tools))} for annotation in wildebeest.__get_raw_classifications__(subject_id,workflow_id): tool_votes = {int(t_index): 0 for t_index in range(len(tools))} for task in annotation: if task["task"] == marking_task: for marking in task["value"]: tool_votes[int(marking["tool"])] += 1 for t_index in tool_votes: overall_votes[t_index].append(tool_votes[t_index])
__author__ = 'greg' import matplotlib.pyplot as plt from matplotlib.collections import PolyCollection import numpy as np import csv import json from aggregation_api import AggregationAPI import matplotlib.cbook as cbook import sys # subject_id = int(sys.argv[1]) # minimum_users = int(sys.argv[2]) subject_id = 511723 project = AggregationAPI(348,public_panoptes_connection=True) subject_image = project.__image_setup__(subject_id) for minimum_users in [8]: print minimum_users fig, ax = plt.subplots() image_file = cbook.get_sample_data(subject_image) image = plt.imread(image_file) # fig, ax = plt.subplots() im = ax.imshow(image) all_vertices = [] with open("/tmp/348/4_ComplexAMOS/vegetation_polygons_heatmap.csv","rb") as f:
from __future__ import print_function import matplotlib matplotlib.use('WXAgg') from aggregation_api import AggregationAPI import matplotlib.pyplot as plt import matplotlib.cbook as cbook import cv2 import numpy as np import math from skimage.feature import blob_dog, blob_log, blob_doh butterflies = AggregationAPI(1150,"quasi") butterflies.__setup__() def closest(aim,points): distance = float("inf") best_point = None for p in points: d = math.sqrt((aim[0]-p[0])**2+(aim[1]-p[1])**2) if d < distance: distance = d best_point = p return tuple(best_point) # fname = butterflies.__image_setup__(1120709)[0] for subject_id in butterflies.__get_subjects_in_workflow__(874): fname = butterflies.__image_setup__(subject_id)[0] print(fname)
def __init__(self,project_id,environment,end_date=None): AggregationAPI.__init__(self,project_id,environment,end_date=end_date) # just to stop me from using transcription on other projects assert int(project_id) in [245,376]
def __init__(self,project_id,environment,end_date=None): AggregationAPI.__init__(self,project_id,environment,end_date=end_date) # just to stop me from using transcription on other projects if not int(project_id) in [245, 376]: raise ValueError('project_id must be either 245 or 376')
def __setup__(self): AggregationAPI.__setup__(self) self.__set_classification_alg__(SubjectRetirement,{"host":self.host_api,"project_id":self.project_id,"token":self.token,"workflow_id":121})
__author__ = 'ggdhines' import matplotlib matplotlib.use('WXAgg') import aggregation_api import cv2 import numpy as np import matplotlib.pyplot as plt from aggregation_api import AggregationAPI from sklearn.cluster import KMeans import matplotlib.cbook as cbook jungle = AggregationAPI(153,"development") # jungle.__migrate__() # jungle.__aggregate__() postgres_cursor = jungle.postgres_session.cursor() postgres_cursor.execute("select subject_ids,annotations from classifications where project_id = 153") markings = {} for subject_ids,annotations in postgres_cursor.fetchall(): if subject_ids == []: continue s = subject_ids[0] for task in annotations: if task["task"] == "T2": try: m = task["value"][0]["points"] if s not in markings: markings[s] = [m]
from __future__ import print_function import matplotlib matplotlib.use('WXAgg') from aggregation_api import AggregationAPI import matplotlib.pyplot as plt import matplotlib.cbook as cbook import cv2 import numpy as np butterflies = AggregationAPI(1150, "development") butterflies.__setup__() # fname = butterflies.__image_setup__(1120709)[0] fname = butterflies.__image_setup__(1500825)[0] image_file = cbook.get_sample_data(fname) image = plt.imread(image_file) res = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # kernely = cv2.getStructuringElement(cv2.MORPH_RECT,(10,2)) # dy = cv2.Sobel(res,cv2.CV_16S,0,2) # dy = cv2.convertScaleAbs(dy) # cv2.normalize(dy,dy,0,255,cv2.NORM_MINMAX) # ret,close = cv2.threshold(dy,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) # # t= image # _,contour, hier = cv2.findContours(close.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) # for cnt in contour: # x,y,w,h = cv2.boundingRect(cnt) # if ((w/h)>5) and (w>130) and (w < 160):
__author__ = 'ggdhines' import matplotlib matplotlib.use('WXAgg') from aggregation_api import AggregationAPI import matplotlib.cbook as cbook import matplotlib.pyplot as plt import cv2 import numpy as np with AggregationAPI(592,"development") as sea: sea.__setup__() postgres_cursor = sea.postgres_session.cursor() select = "SELECT classification_subjects.subject_id,annotations from classifications INNER JOIN classification_subjects ON classification_subjects.classification_id = classifications.id where workflow_id = 607" postgres_cursor.execute(select) for subject_id in postgres_cursor.fetchall(): subject_id = subject_id[0] f_name = sea.__image_setup__(subject_id) image_file = cbook.get_sample_data(f_name[0]) image = plt.imread(image_file) gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) fig, ax1 = plt.subplots(1, 1) ax1.imshow(gray_image) plt.show() # (thresh, _) = cv2.threshold(gray_image, 250, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) im_bw = cv2.threshold(gray_image, 180, 255, cv2.THRESH_BINARY)[1]