def classify(): """ Takes post data structured like this: { "data":["I like them apples", "I prefer green apples"] } """ classifiers_to_use = request.args.get('classifiers') use_new_classifier = request.args.get('new_cls') if classifiers_to_use: classifiers_to_use = classifiers_to_use.split(',') elif use_new_classifier: use_new_classifier = (use_new_classifier.lower() == 'true') post_payload = request.get_json(force=True) cl = Classification() data = post_payload['data'] predictions = list(cl.single_classification(tuple(data), to_json=True, classifiers_to_include=classifiers_to_use, use_new_classifier=use_new_classifier)) for text, preds in zip(data, predictions): preds['text'] = text response = {'count':len(predictions), 'data':predictions} return jsonify(response)
def __init__(self,environment,project): Classification.__init__(self,environment) self.project = project self.num_retired = None self.non_blanks_retired = None self.to_retire = None
def __init__(self,environment,project): Classification.__init__(self,environment) self.project = project self.num_retired = None self.non_blanks_retired = None # to know how often we should call Panoptes to get a new token # save on having to make unnecessary calls self.token_date = datetime.datetime.now() self.to_retire = set() self.total_retired = 0
def __init__(self,environment,param_dict): Classification.__init__(self,environment) assert isinstance(param_dict,dict) # to retire subjects, we need a connection to the host api, which hopefully is provided self.host_api = None self.project_id = None self.token = None self.workflow_id = None for key,value in param_dict.items(): if key == "host": self.host_api = value elif key == "project_id": self.project_id = value elif key == "token": self.token = value elif key == "workflow_id": self.workflow_id = value assert (self.host_api is not None) and (self.project_id is not None) and (self.token is not None) and (self.workflow_id is not None)
def test_cost_zeros(self): # Cost should be 0.693 when theta is zeroed theta_z = pd.Series(np.zeros(len(self.X.columns))) clsfy = Classification(self.X, self.y) cost = clsfy._cost(theta_z) self.assertAlmostEqual(cost, 0.693, places=3)
cols_list = [] while cols != "Y": cols = input() cols_list.append(cols) data.scale_data(cols_list[:-1]) cols_list = data.get_cols() y_column_name = input("enter y column name: ") X_train, X_test, y_train, y_test = data.spilt_data(y_column_name) model_type = input("Enter R for Regression and C for Classification: ") if model_type == "C": print("Your options are: " + str(Classifier_list)) #add mode list modelname = input("Enter model to be used: ") classifier = Classification(X_train, X_test, y_train, y_test, modelname) classifier.predict() classifier.accuracy() classifier.save_model() elif model_type == 'R': print("Your options are: " + str(Regressor_list)) #add mode list modelname = input("Enter model to be used, use A for all") if modelname == "A": for modelname in Regressor_list: regressor = Regression(X_train, X_test, y_train, y_test, modelname) regressor.predict() regressor.accuracy() regressor.save_model() else: regressor = Regression(X_train, X_test, y_train, y_test, modelname)
class Wales(WaterAlgorithm): def __init__(self, O_O_distance=2.8, O_H_distance=1.0, intermediate_saves=[], folder="wales_21+", group_saves=[]): WaterAlgorithm() self.initialize(O_H_distance=O_H_distance, O_O_distance=O_O_distance, intermediate_saves=intermediate_saves, group_saves=group_saves, folder=folder, charge=1, do_symmetry_check=False, order=[3,14,9,2,10,19,16,15,7,18,4,0,17,1,12,6,5,8,20,11,13]) self.N = 21 self.classification = Classification(self) def get_single_molecule_hydrogen_coordinates(self, site, water_orientation, i, oxygen_positions, nearest_neighbors_nos, nn_periodicity, nn_periodicity_axis, cell): bvv = get_bond_variable_values_from_water_orientation(water_orientation) if water_orientation > 9: result = np.zeros((3,3)) else: result = np.zeros((2,3)) index = 0 #print nearest_neighbors_nos for n, x in enumerate(nearest_neighbors_nos): if bvv[n] == 1: # i == x means that this is a dangling bond if i == x: com = oxygen_positions[3] vector = oxygen_positions[i] - com # normalize the vector vector_length = scipy.linalg.norm(vector) vector /= vector_length # the dangling hydrogen is along this vector result[index] = np.array(oxygen_positions[i] + self.O_H_distance * vector) else: result[index] = np.array(oxygen_positions[i] - (( self.O_H_distance * (oxygen_positions[i]-oxygen_positions[x])) / get_distance(oxygen_positions[i], oxygen_positions[x], False, None))) index += 1 #print result return result def get_all_oxygen_coordinates(self): """result = np.array( [[ 0.000, 0.000, 0.000 ], [ 0.427, -0.000, 0.565 ], [ 0.188, 0.577, 0.795 ], [ -0.491, 0.357, 0.795 ], [ -0.491,-0.357, 0.795 ], [ 0.188,-0.577, 0.795 ], [ 0.982, 0.000, 0.188 ], [ 0.304, 0.934, 0.188 ], [ -0.795, 0.577, 0.188 ], [ -0.795,-0.577, 0.188 ], [ 0.304,-0.934, 0.188 ], [ 0.645, 0.447, -0.118 ], [ -0.304, 0.934, -0.188 ], [ -0.702, 0.000, -0.158 ], [ -0.304,-0.934, -0.188 ], [ 0.795,-0.577, -0.188 ], [ 0.491, 0.357, -0.795 ], [ -0.188, 0.577, -0.795 ], [ -0.607, 0.000, -0.795 ], [ -0.188,-0.577, -0.795 ], [ 0.491,-0.357, -0.795 ]]) * (self.O_O_distance / 0.713644) return result""" return read('optimal_wales.xyz').get_positions() def additional_requirements_met(self, water_orientation, water_orient, molecule_no): wo = water_orient.copy() if wo[molecule_no] != -1: return False wo[molecule_no] = water_orientation res, counts = self.classification.get_bond_types(wo) if counts[10][1] > 0: print water_orient print counts raw_input() # Check the number of AAD-AAD and ADD-ADD bonds if counts[4][0]+counts[8][0]> 2 or counts[3][0] > 1 or counts[10][1] > 0 or counts[11][1] > 0 or counts[12][1] > 0 or counts[13][1] > 0: #print "----------------" #print counts[4][0]+counts[8][0] #self.view_result(wo) #raw_input() #print return False else: return True
def test_sigmoid_scalar(self): # for very negative numbers, sigmoid should equal zero, for very large, it should be 1 self.assertAlmostEqual(Classification._sigmoid(-1000000), 0) self.assertAlmostEqual(Classification._sigmoid(1000000), 1)
"map_code", "ambito", "population_total", "population_male", "population_female", "dwellings_occupied", ] df = df[["state_code", "state_name", "municipality_code", "municipality_name", "locality_code", "locality_name"]] df.state_code = df.state_code.astype(str).str.zfill(2) df.municipality_code = df.municipality_code.astype(str).str.zfill(3) df.locality_code = df.locality_code.astype(str).str.zfill(4) df.municipality_code = df.state_code + df.municipality_code df.locality_code = df.municipality_code + df.locality_code df.state_name = df.state_name.str.title() df.municipality_name = df.municipality_name.str.title() df.locality_name = df.locality_name.str.title() h = Hierarchy(["state", "municipality", "locality"]) parent_code_table = repeated_table_to_parent_id_table(df, h) parent_id_table = parent_code_table_to_parent_id_table(parent_code_table, h) c = Classification(parent_id_table, h) c.to_csv("out/locations_mexico_inegi.csv") c.to_stata("out/locations_mexico_inegi.dta")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') batch_size = 64 # image size 3, 32, 32 # batch size must be an even number # shuffle must be True cifar_10_train_dt = CIFAR10(r'data', download=False, transform=ToTensor()) #dev = Subset(cifar_10_train_dt, range(128)) cifar_10_train_l = DataLoader(cifar_10_train_dt, batch_size=batch_size, shuffle=False, pin_memory=torch.cuda.is_available()) encoder = models.Encoder() classification = Classification().to(device) root = Path(r'modified/models') model_path = root / Path(r'encoder500.wgt') encoder.load_state_dict(torch.load(str(model_path))) encoder.to(device) classification_optim = Adam(classification.parameters(), lr=1e-4) epoch_restart = 50 root_classification_model = Path(r'classification_model_baseline_modified') if epoch_restart > 0 and root is not None: classification_loss_file = root_classification_model / Path( 'classification_loss' + str(epoch_restart) + '.wgt') classification.load_state_dict(torch.load(str(classification_loss_file)))
def trainNestedCV(direct, subject, session, filename, hyp_params, parameters): subj = load_subject(direct, subject, 1, filename)["subject"] # # data = subj.data3D.astype(np.float32) # convert data to 3d for deep learning # labels = subj.labels.astype(np.int64) # labels[:] = [x - 1 for x in labels] data, labels = format_data('words', subject, 4096) import random #just for testing labels = [] #just for testing for i in range(200): #just for testing labels.append(random.randint(0, 3)) #just for testing labels = np.array(labels).astype(np.int64) data = data[:200, :, 0:750] unique = np.unique(labels, return_counts=False) data_params = dict(n_classes=len(unique), n_chans=6, input_time_length=subj.epoch) #n_chans = subj.n_chans #w = windows(data, subj, 500, 250, 500) # fs = subj.sfreq # list of windows num_folds = 2 skf = StratifiedKFold( n_splits=num_folds, shuffle=False, random_state=10) # don't randomize trials to preserce structure trainsetlist, testsetlist = [], [] inner_fold_acc, inner_fold_loss, inner_fold_CE = [], [], [] subj_results = Results( subject, filename, num_folds) #, class_names=["apple", "orange", "car", "bus"] subj_results.change_directory(direct) subj_results.get_acc_loss_df( hyp_params, 'Fold') # empty dataframe headed with each HP set clf = Classification(hyp_params, parameters, data_params, "01", "shallow", "words") # classifier object print(f"Inner-fold training for Subject {subject} in progress...") for inner_ind, outer_index in skf.split(data, labels): inner_fold, outer_fold = data[inner_ind], data[outer_index] inner_labels, outer_labels = labels[inner_ind], labels[outer_index] subj_results.concat_y_true(outer_labels) trainsetlist.append(SignalAndTarget( inner_fold, inner_labels)) # used for outer-fold train/test testsetlist.append(SignalAndTarget(outer_fold, outer_labels)) for train_idx, valid_idx in skf.split(inner_fold, inner_labels): X_Train, X_val = inner_fold[train_idx], inner_fold[valid_idx] y_train, y_val = inner_labels[train_idx], inner_labels[valid_idx] train_set = SignalAndTarget(X_Train, y_train) val_set = SignalAndTarget(X_val, y_val) hyp_param_acc, hyp_param_loss = [], [] hyp_param_acc, hyp_param_loss, hyp_param_CE = clf.train_inner( train_set, val_set, None, False) inner_fold_loss.append(hyp_param_loss) inner_fold_acc.append(hyp_param_acc) inner_fold_CE.append(hyp_param_CE) subj_results.fill_acc_loss_df(inner_fold_acc, inner_fold_loss, inner_fold_CE) subj_results.get_hp_means( hyp_params, "accuracy") #needed to select inter-subject parameters subj_results.get_best_params("accuracy") clf.best_params = subj_results.best_params clf.set_best_params() print(f"Best parameters selected: {clf.best_params}") print( "///////-------------------------------------------------------///////" ) print( f"Outer-fold training and testing for Subject {subject} in progress..." ) scores, fold_models, predictions, probabilities, outer_cross_entropy = clf.train_outer( trainsetlist, testsetlist, False ) #accuracy score for each fold, combined predictions for each fold subj_results.outer_fold_accuracies = scores subj_results.y_pred = np.array(predictions) subj_results.y_probs = np.array(probabilities) subj_results.outer_fold_cross_entropies = outer_cross_entropy subj_results.train_loss, subj_results.valid_loss, subj_results.test_loss, subj_results.train_acc, subj_results.valid_acc, subj_results.test_acc = get_model_loss_and_acc( fold_models) subj_results.save_result() subj_results.subject_stats() print("") print(subj_results.subject_stats_df.head())
four_digit["parent_code"] = four_digit.code.apply(lambda x: x[:2]) four_digit = four_digit.drop("community", axis=1) four_digit["level"] = "4digit" two_digit = hs4.iloc[1241:1339] two_digit["code"] = two_digit.code.astype(str).str.zfill(2) two_digit = two_digit.rename(columns={"community": "parent_code"}) two_digit["parent_code"] = two_digit.parent_code.astype(str).str.zfill(3) two_digit["level"] = "2digit" section = hs4.iloc[1339:].drop("community", axis=1) section["code"] = section.code.astype(str).str.zfill(3) section["parent_code"] = None section["level"] = "section" hs_clean = pd.concat([section, two_digit, four_digit]) hs_clean = hs_clean.reset_index(drop=True) h = Hierarchy(["section", "2digit", "4digit"]) hs_clean = parent_code_table_to_parent_id_table(hs_clean, h) c = Classification(hs_clean, h) #community = pd.read_table("in/hs4_community.tsv", encoding="utf-8") #hs4 = hs4.merge(community, left_on="community", right_on="code", how="inner") # weird bug where pandas infer_type was returning mixed instead of string c.table.code = c.table.code.astype(str) c.to_csv("out/hs92_atlas.csv") c.to_stata("out/hs92_atlas.dta")
from classification import Classification from pprint import pprint client_id = 'id' client_secret = 'secret' c = Classification(client_id, client_secret) pprint(c.find_student_classification('MI-PYT', 'laskobor'))
df.level = df.level.astype("category", categories=h, ordered=True) df = df.sort_values(by=["level", "code"]) df.level = df.level.astype(str) df = df.reset_index(drop=True) parent_id_table = parent_code_table_to_parent_id_table(df, h) # TODO: This isn't the official classification level name but this makes # compatibility between colombia and mexico way easier # parent_code_table.loc[parent_code_table.level == "state", "level"] = "department" # Drop the "locality" level since we don't use it # parent_code_table = parent_code_table[parent_code_table.level != "locality"] parent_id_table = parent_id_table[[ "code", "name", "level", "name_es", "name_en", "name_short_es", "name_short_en", "parent_id", ]] c = Classification(parent_id_table, h) c.to_csv("out/locations_peru_inei.csv") c.to_stata("out/locations_peru_inei.dta")
def __init__( self, method=None, data_fold=None, full_dataset=True, metal_groupThres=0.1, thatch_groupThres=0.1, groupBounds=False, erosion=0, suppress=None, pickle_viola=None, # single_detector=True, in_path=None, out_path=None, neural=None, ensemble=None, detector_params=None, pipe=None, out_folder_name=None, net_threshold=0.5): ''' Parameters: ------------------ groupThres bool Decides if we should do grouping on neural detections method:string Can be either 'viola' or 'sliding_window' ''' assert method == 'viola' or method == 'slide' self.method = method self.full_dataset = full_dataset self.data_fold = data_fold self.groupThres = dict() self.groupThres['thatch'] = float(metal_groupThres) self.groupThres['metal'] = float(thatch_groupThres) self.groupBounds = groupBounds self.erosion = erosion #self.single_detector = single_detector self.in_path = in_path if DEBUG: self.img_names = [ img_name for img_name in os.listdir(self.in_path) if img_name.endswith('jpg') ][:1] else: self.img_names = [ img_name for img_name in os.listdir(self.in_path) if img_name.endswith('jpg') ] self.out_path = out_path #Setup Viola: if we are given an evaluation directly, don't bother running viola if self.method == 'viola': self.pickle_viola = pickle_viola if self.pickle_viola is None: self.viola = ViolaDetector(pipeline=True, out_path=out_path, in_path=in_path, folder_name=out_folder_name, save_imgs=True, **detector_params) else: with open(pickle_viola, 'rb') as f: self.viola_evaluation = pickle.load(f) self.viola_evaluation.in_path = self.in_path self.viola_evaluation.out_path = self.out_path #Setup the sliding window elif self.method == 'slide': self.slider = SlidingWindowNeural(full_dataset=self.full_dataset, in_path=self.in_path, out_path=self.out_path, **detector_params) else: raise ValueError('Need to specific either viola or sliding window') self.ensemble = ensemble #EVALUATION OBJECTS self.auc_thresholds = [.5] self.detections_after_neural = list() self.evaluation_after_neural = list() detector_names = detector_params[ 'detector_names'] if self.method == 'viola' else None for thres in self.auc_thresholds: detections = Detections() self.detections_after_neural.append(detections) self.evaluation_after_neural.append( Evaluation(detections=detections, method='pipeline', save_imgs=True, out_path=self.out_path, auc_threshold=thres, folder_name=out_folder_name, in_path=self.in_path, detector_names=detector_names)) self.auc = AucCurve(self.img_names, self.evaluation_after_neural[0].correct_roofs, self.out_path, self.method) if self.data_fold == utils.TESTING: self.classification = Classification( self.img_names, self.out_path, self.evaluation_after_neural[0].correct_roofs, self.method) print self.img_names self.neural_time = defaultdict(int) self.viola_time = defaultdict(int)
class Pipeline(object): def __init__( self, method=None, data_fold=None, full_dataset=True, metal_groupThres=0.1, thatch_groupThres=0.1, groupBounds=False, erosion=0, suppress=None, pickle_viola=None, # single_detector=True, in_path=None, out_path=None, neural=None, ensemble=None, detector_params=None, pipe=None, out_folder_name=None, net_threshold=0.5): ''' Parameters: ------------------ groupThres bool Decides if we should do grouping on neural detections method:string Can be either 'viola' or 'sliding_window' ''' assert method == 'viola' or method == 'slide' self.method = method self.full_dataset = full_dataset self.data_fold = data_fold self.groupThres = dict() self.groupThres['thatch'] = float(metal_groupThres) self.groupThres['metal'] = float(thatch_groupThres) self.groupBounds = groupBounds self.erosion = erosion #self.single_detector = single_detector self.in_path = in_path if DEBUG: self.img_names = [ img_name for img_name in os.listdir(self.in_path) if img_name.endswith('jpg') ][:1] else: self.img_names = [ img_name for img_name in os.listdir(self.in_path) if img_name.endswith('jpg') ] self.out_path = out_path #Setup Viola: if we are given an evaluation directly, don't bother running viola if self.method == 'viola': self.pickle_viola = pickle_viola if self.pickle_viola is None: self.viola = ViolaDetector(pipeline=True, out_path=out_path, in_path=in_path, folder_name=out_folder_name, save_imgs=True, **detector_params) else: with open(pickle_viola, 'rb') as f: self.viola_evaluation = pickle.load(f) self.viola_evaluation.in_path = self.in_path self.viola_evaluation.out_path = self.out_path #Setup the sliding window elif self.method == 'slide': self.slider = SlidingWindowNeural(full_dataset=self.full_dataset, in_path=self.in_path, out_path=self.out_path, **detector_params) else: raise ValueError('Need to specific either viola or sliding window') self.ensemble = ensemble #EVALUATION OBJECTS self.auc_thresholds = [.5] self.detections_after_neural = list() self.evaluation_after_neural = list() detector_names = detector_params[ 'detector_names'] if self.method == 'viola' else None for thres in self.auc_thresholds: detections = Detections() self.detections_after_neural.append(detections) self.evaluation_after_neural.append( Evaluation(detections=detections, method='pipeline', save_imgs=True, out_path=self.out_path, auc_threshold=thres, folder_name=out_folder_name, in_path=self.in_path, detector_names=detector_names)) self.auc = AucCurve(self.img_names, self.evaluation_after_neural[0].correct_roofs, self.out_path, self.method) if self.data_fold == utils.TESTING: self.classification = Classification( self.img_names, self.out_path, self.evaluation_after_neural[0].correct_roofs, self.method) print self.img_names self.neural_time = defaultdict(int) self.viola_time = defaultdict(int) def run(self, img_type='inhabited', img_names=None, in_path=None): ''' 1. Find proposals using ViolaJones or sliding window 2. Resize the window and classify it 3. Net returns a list of the roof coordinates of each type - saved in roof_coords ''' img_names = img_names if img_names is not None else self.img_names in_path = in_path if in_path is not None else self.in_path for i, img_name in enumerate(img_names): print '***************** Image {0}: {1}/{2} *****************'.format( img_name, i, len(img_names) - 1) #VIOLA: currently it does no scoring, we commented out in viola_detector.py rect_detections = dict() if self.method == 'viola': if self.pickle_viola is None: img = self.viola.detect_roofs(img_name=img_name, in_path=in_path) current_viola_detections = self.viola.viola_detections self.viola_time[ img_type] = self.viola.evaluation.detections.total_time else: #use the pickled detections for speed in testing the neural network current_viola_detections = self.viola_evaluation.detections self.viola_time[ img_type] = self.viola_evaluation.detections.total_time proposal_patches, proposal_coords, img_shape = self.find_viola_proposals( current_viola_detections, img_name=img_name, in_path=in_path) for roof_type in utils.ROOF_TYPES: if len(proposal_coords[roof_type]) > 0: rect_detections[roof_type] = utils.polygons2boxes( proposal_coords[roof_type]) else: rect_detections[roof_type] = np.array([]) #SLIDING WINDOW: also does no scoring elif self.method == 'slide': with Timer() as t: #get the roofs with sliding detector proposal_coords, rect_detections = self.slider.get_windows( img_name, in_path=in_path) #convert them to patches proposal_patches, img_shape = self.find_slider_proposals( rect_detections, img_name=img_name, in_path=in_path) print 'Sliding window detection for one image took {} seconds'.format( t.secs) else: print 'Unknown detection method {}'.format(self.method) sys.exit(-1) if in_path == self.in_path: self.print_detections(rect_detections, img_name, '_viola') #NEURALNET print 'Starting neural classification of image {}'.format(img_name) with Timer() as t: #NOTE: classified detections only has roofs with prob >= 0.5 classified_detections, probs = self.neural_classification_AUC( proposal_patches, rect_detections) print 'Classification took {} secs'.format(t.secs) self.neural_time[img_type] += t.secs #GROUPING rect_detections, probs, grouping_time = self.nonmax_suppression( rect_detections, probs) self.neural_time[img_type] += grouping_time #PRINTING DETECTIONS if in_path == self.in_path: self.print_detections( { 'metal': classified_detections['metal'][0], 'thatch': classified_detections['thatch'][0] }, img_name, '_neural') det = dict() for roof_type in utils.ROOF_TYPES: det[roof_type] = rect_detections[roof_type][ probs[roof_type] > 0.5] if in_path == self.in_path: self.print_detections(det, img_name, '_grouped') #AUC AND CLASSIFICATION USING THE GROUPED DETECTIONS #only do AUC with the inhabited images if in_path == self.in_path: self.auc.set_detections(rect_detections, img_name) self.auc.set_probs(probs, img_name) #only do classification if we are using the testing set if self.data_fold == utils.TESTING: self.classification.set_detections(rect_detections, img_name) self.classification.set_probs(probs, img_name) def print_detections(self, detections, img_name, title): if detections is not None: for roof_type, detects in detections.iteritems(): img = cv2.imread(self.in_path + img_name) if img_name in self.evaluation_after_neural[0].correct_roofs[ roof_type]: #the uninhabited images do not have an entry utils.draw_detections(self.evaluation_after_neural[0]. correct_roofs[roof_type][img_name], img, rects=True, color=(0, 255, 0), thickness=6) if detects.shape[0] > 0: utils.draw_detections(detects, img, rects=True, color=(255, 0, 0), thickness=3) cv2.imwrite( 'debug/{}_{}_{}{}.jpg'.format(self.groupThres[roof_type], img_name[:-4], roof_type, title), img) def nonmax_suppression(self, rect_detections, probs): with Timer() as t: #set detections and score for roof_type in utils.ROOF_TYPES: #proper non max suppression from Felzenszwalb et al. if len(rect_detections[roof_type]) > 0: rect_detections[roof_type], probs[ roof_type] = suppression.non_max_suppression( rect_detections[roof_type], probs[roof_type], overlapThres=self.groupThres[roof_type]) print 'Grouping took {} seconds'.format(t.secs) return rect_detections, probs, t.secs def get_correct_class_per_detection(self, rect_detections, img_name): #this is needed to build the Recall precision curve #get the best class guess of the detections by scoring it with ground truth self.slider.detections.set_detections( roof_type='thatch', detection_list=rect_detections['thatch'], img_name=img_name) self.slider.detections.set_detections( roof_type='metal', detection_list=rect_detections['metal'], img_name=img_name) #score the image self.slider.evaluation.score_img( img_name=img_name, img_shape=(-1, -1), fast_scoring=True ) #since we use fast scoring, we don't need the img_shape #get the proper class by looking at the best score for each detection correct_classes = dict() for roof_type in utils.ROOF_TYPES: correct_classes[roof_type] = np.zeros( (len(rect_detections[roof_type]))) for d, (detection, score) in enumerate( self.slider.detections.best_score_per_detection[img_name] [roof_type]): correct_classes[roof_type][d] = 0 if score < 0.5 else 1 correct_classes[roof_type] = list(correct_classes[roof_type]) return correct_classes def group_min_bound(self, polygons, img_shape, erosion=0): ''' Attempt at finding the minbound of all overlapping rects and merging them to a single detection. This unfortunately will merge nearby roofs. ''' bitmap = np.zeros(img_shape, dtype='uint8') utils.draw_detections(np.array(polygons), bitmap, fill=True, color=1) if erosion > 0: kernel = np.ones((5, 5), np.uint8) bitmap = cv2.erode(bitmap, kernel, iterations=erosion) #get contours contours, hierarchy = cv2.findContours(bitmap, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) #get the min bounding rect for the rects min_area_conts = [ np.int0(cv2.cv.BoxPoints(cv2.minAreaRect(cnt))) for cnt in contours ] return min_area_conts def find_viola_proposals(self, viola_detections, img_name=None, in_path=None): '''Call viola to find coordinates of candidate roofs. Extract those patches from the image, tranform them so they can be fed to neural network. Return both the coordinates and the patches. ''' in_path = self.in_path if in_path is None else in_path try: img_full = cv2.imread(in_path + img_name, flags=cv2.IMREAD_COLOR) img_shape = img_full.shape except IOError as e: print e sys.exit(-1) all_proposal_patches = dict() all_proposal_coords = dict() #extract patches for neural network classification for roof_type in ['metal', 'thatch']: all_proposal_coords[roof_type] = viola_detections.get_detections( img_name=img_name, roof_type=roof_type) #all_proposal_coords[roof_type] = self.viola.viola_detections.get_detections(img_name=img_name, roof_type=roof_type) patches = np.empty((len(all_proposal_coords[roof_type]), 3, utils.PATCH_W, utils.PATCH_H)) for i, detection in enumerate(all_proposal_coords[roof_type]): #extract the patch from the image using utils code img = utils.four_point_transform(img_full, detection) #transform the patch using utils code patch = utils.cv2_to_neural(img) patches[i, :, :, :] = patch all_proposal_patches[roof_type] = patches return all_proposal_patches, all_proposal_coords, img_shape def find_slider_proposals(self, slider_rects, img_name=None, in_path=None): #rects are in the form of (x, y, w, h) in_path = self.in_path if in_path is None else in_path try: img_full = cv2.imread(in_path + img_name, flags=cv2.IMREAD_COLOR) img_shape = img_full.shape except IOError as e: print e sys.exit(-1) all_proposal_patches = dict() #extract patches for neural network classification for roof_type in ['metal', 'thatch']: patches = np.empty((len(slider_rects[roof_type]), 3, utils.PATCH_W, utils.PATCH_H)) for i, rect in enumerate(slider_rects[roof_type]): #extract the patch from the image using utils code img = img_full[rect.ymin:rect.ymax, rect.xmin:rect.xmax, :] #transform the patch using utils code patch = utils.cv2_to_neural(img) patches[i, :, :, :] = patch all_proposal_patches[roof_type] = patches return all_proposal_patches, img_shape def process_viola(self, rows, cols, img_path=None, verbose=False): #Find candidate roof contours using Viola for all types of roof #returns list with as many lists of detections as the detectors we have passed self.viola.detect_roofs(img_name=self.img_name, img_path=self.test_img_path + self.img_name) print 'Detected {0} candidate roofs'.format( len(self.viola.roofs_detected[self.img_name])) if verbose: self.viola.mark_detections_on_img(img=self.image, img_name=self.img_name) #get the mask and the contours for the detections detection_mask, _ = self.viola.get_patch_mask(img_name=self.img_name, rows=rows, cols=cols) patch_location = self.out_path + self.img_name + '_mask.jpg' misc.imsave(patch_location, detection_mask) self.all_contours[self.img_name] = self.viola.get_detection_contours( patch_location, self.img_name) def neural_classification(self, proposal_patches, proposal_coords): classified_detections = defaultdict(list) for roof_type in utils.ROOF_TYPES: #classify with neural network if proposal_patches[roof_type].shape[0] > 1: if self.single_detector: #we have a single net classes = np.array( self.net.test(proposal_patches[roof_type])) #filter according to classification for detection, classification in zip( proposal_coords[roof_type], classes): if classification == utils.NON_ROOF: classified_detections['background'].append( detection) elif classification == utils.METAL: classified_detections['metal'].append(detection) elif classification == utils.THATCH: classified_detections['thatch'].append(detection) else: #we have one net per roof type specific_net = self.net[roof_type] classes = specific_net.test(proposal_patches[roof_type]) #filter according to classification for detection, classification in zip( proposal_coords[roof_type], classes): if classification == 0: classified_detections['background'].append( detection) elif classification == 1: classified_detections[roof_type].append(detection) else: raise ValueError('Unknown classification of patch') else: print 'No {0} detections'.format(roof_type) return classified_detections def neural_classification_AUC(self, proposal_patches, proposal_coords): #get the classification by evaluating it compared to the real roofs #get the probability of it being that type of roof classified_detections = dict() probs = dict() for roof_type in utils.ROOF_TYPES: classified_detections[roof_type] = list() if proposal_patches[roof_type].shape[0] > 1: probs[roof_type] = self.ensemble.predict_proba( proposal_patches[roof_type], roof_type=roof_type) #different detections depending on threshold coords = np.array(proposal_coords[roof_type]) for thres in self.auc_thresholds: detections_logical = probs[roof_type] >= thres classified_detections[roof_type].append( coords[detections_logical]) else: print 'No {0} detections'.format(roof_type) for thres in self.auc_thresholds: classified_detections[roof_type].append(np.array([])) probs[roof_type] = np.array([]) return classified_detections, probs def save_img_detections(self, img_name, proposal_coords, predictions, in_path=None): raise ValueError('Incorrect method') in_path = self.in_path if in_path is None else in_path img = cv2.imread(self.in_path + img_name) roofs = DataLoader().get_roofs(in_path + img_name[:-3] + 'xml', img_name) for roof in roofs: cv2.rectangle(img, (roof.xmin, roof.ymin), (roof.xmin + roof.width, roof.ymin + roof.height), (0, 255, 0), 2) for (x, y, w, h), accept in zip(proposal_coords['metal'], predictions[img_name]['metal']): color = (0, 0, 0) if accept == 1 else (0, 0, 255) cv2.rectangle(img, (x, y), (x + w, y + h), color, 2) cv2.imwrite(self.out_path + img_name, img)
h = Hierarchy(["department", "municipality", "population_center"]) df = df.rename( columns={ "department_name": "name_department", "municipality_name": "name_municipality", "population_center_name": "name_population_center", }) parent_code_table = repeated_table_to_parent_id_table( df, h, level_fields={ "department": ["name_department"], "municipality": ["name_municipality"], "population_center": ["name_population_center"], }, ) parent_id_table = parent_code_table_to_parent_id_table( parent_code_table, h) # Reorder columns to keep diff clean parent_id_table = parent_id_table.ix[:, [ "code", "name", "level", "parent_id" ]] c = Classification(parent_id_table, h) c.to_csv("out/locations_colombia_dane.csv") c.to_stata("out/locations_colombia_dane.dta")
def __init__(self,project,clustering_alg=None): Classification.__init__(self,project,clustering_alg)
#-*- coding:utf-8 -*- # AUTHOR: yaolili # FILE: runClassification.py # ROLE: run classifier in Classification and get the result of prediction # CREATED: 2015-12-15 09:28:02 # MODIFIED: 2015-12-15 09:28:03 import sys import os from classification import Classification if __name__ == "__main__": if len(sys.argv) < 5: print "sys.argv[1]: classifier" print "sys.argv[2]: trainFile" print "sys.argv[3]: devFile" print "sys.argv[4]: outputFile" exit() cfInstance = Classification(sys.argv[1], sys.argv[2], sys.argv[3]) cfInstance.getPreResult(sys.argv[4])
if len(sys.argv) > 1: print 'Parsing...', sys.stdout.flush() p = Parse(sys.argv[1]) p.compute_fqdn() print 'DONE' print 'Computing features (Can take some time because of whois queries)...', sys.stdout.flush() features = Features(p) features.compute() print 'DONE' print 'Classification...', sys.stdout.flush() classification = Classification(features, p) classification.compute() print 'DONE' print 'Launching webserver...', sys.stdout.flush() flask_app = Flask('caphaw-dns-classifier') print 'DONE' @flask_app.route('/') def index(): return render_template('index.html', X=features.X, X_scaled=features.X_scaled, features_list=[features.features_list]*len(features.X_scaled), all=sorted(classification.all),
scoreRR, PvalueRR = regression.fnRANSACRegressor( yearList, avgTempList, predictYear) scoreGP, PvalueGP = regression.fnGaussianProcessRegressor( yearList, avgTempList, predictYear) scoreSV, PvalueSV = regression.fnSVR(yearList, avgTempList, predictYear) score = np.array( [scoreReg, scoreIso, scoreBR, scoreRR, scoreGP, scoreSV]) pValue = np.array( [PvalueReg, PvalueIso, PvalueBR, PvalueRR, PvalueGP, PvalueSV]) pValue = pValue[np.logical_not(np.isnan(pValue))] score = score[np.logical_not(np.isnan(pValue))] maxScoreIndex = np.argmax(score) return dumps({"avgTemp": pValue[maxScoreIndex]}) except: return "error" if __name__ == '__main__': global dataFrame global regression global classification dataFrame = DataParser() regression = Regression() classification = Classification() app.run()
#!/usr/bin/env python # -*- coding: utf-8 -*- import re import time import numpy as np from sklearn import datasets from sklearn.cross_validation import train_test_split from classification import Classification if __name__ == "__main__": print('iris data') iris = datasets.load_iris() # 学習データとテストデータを3:1に分割 train, test, train_label, test_label = train_test_split(iris.data, iris.target, test_size=0.25, random_state=0) clf = Classification(train, train_label) # 学習データをセット clf.set_test(test, test_label) # テストデータをセット clf.svm_gridsearch(5) # 5交差でグリッドサーチ clf.cv(5) # 5交差検定の結果を表示 print("test Result") clf.prediction() # テストデータの分類結果を表示
for i, c in enumerate(self.classes): X_c = np.array( [self.X[i] for i, yy in enumerate(self.y) if yy == c]) X_c_num = X_c.shape[0] X_c_mean = np.mean(X_c, axis=0) X_c_var = np.var(X_c, axis=0) self.parameter[c] = (X_c_num, X_c_mean, X_c_var) def predict(self, X): posterior = [ self.parameter[c][0] + self.pdfSum(X, c) for c in self.classes ] return self.classes[np.argmax(posterior, axis=0)] def pdfSum(self, X, c): mu = self.parameter[c][1] sigma = self.parameter[c][2] nu = np.exp(-(X - mu)**2 / (2 * sigma)) de = np.sqrt(2 * np.pi * sigma) + 1e-5 return np.sum(np.log(nu / de), axis=1) if __name__ == '__main__': from classification import Classification clf = Classification() mm = NaiveBayes() mm.fit(clf.X_train, clf.y_train) print(mm.predict(clf.X_test))
how="left") # Merge in region codes alpha3_to_region = pd.read_csv("./in/countries_to_regions.csv", dtype={"parent_code": str}) df = df.merge(alpha3_to_region, on="code_alpha3", how="left") # Add custom codes custom_codes = pd.read_csv("./in/custom-codes.csv", dtype={"parent_code": str}) df = pd.concat([df, custom_codes]).reset_index(drop=True) df["level"] = "country" # Add region code level region_codes = pd.read_table("./in/regions.tsv", dtype={"code": str}) region_codes["code_alpha2"] = region_codes["code"] region_codes["code_alpha3"] = region_codes["code"] region_codes["code_numeric"] = region_codes["code"] region_codes = region_codes.drop("code", axis=1) df = pd.concat([df, region_codes]).reset_index(drop=True) h = Hierarchy(["region", "country"]) df["name"] = df["name_en"] df["code"] = df["code_alpha3"] df = parent_code_table_to_parent_id_table(df, h) # Alpha3 classification df["code"] = df["code_alpha3"] Classification(df, h).to_csv("out/locations_international_iso_cid.csv")
hierarchy = pd.read_table("./in/FarmSize_Hierarchy.tsv", encoding="utf-8") hierarchy.columns = ["level1_code", "level0_code"] fields = {"level0": [], "level1": []} h = Hierarchy(["level0", "level1"]) parent_code_table = repeated_table_to_parent_id_table(hierarchy, h, fields) parent_code_table.code = parent_code_table.code.astype(str) parent_code_table = parent_code_table.merge(names, on=["code", "level"]) parent_id_table = parent_code_table_to_parent_id_table( parent_code_table, h) parent_id_table["name"] = parent_id_table.name_en parent_id_table = parent_id_table[[ "code", "name", "level", "name_en", "name_es", "name_short_en", "name_short_es", "parent_id", ]] c = Classification(parent_id_table, h) c.to_csv("out/farm_size.csv") c.to_stata("out/farm_size.dta")
"latitude", "longitude", "altitude", "map_code", "ambito", "population_total", "population_male", "population_female", "dwellings_occupied"] df = df[["state_code", "state_name", "municipality_code", "municipality_name", "locality_code", "locality_name"]] df.state_code = df.state_code.astype(str).str.zfill(2) df.municipality_code = df.municipality_code.astype(str).str.zfill(3) df.locality_code = df.locality_code.astype(str).str.zfill(4) df.municipality_code = df.state_code + df.municipality_code df.locality_code = df.municipality_code + df.locality_code df.state_name = df.state_name.str.title() df.municipality_name = df.municipality_name.str.title() df.locality_name = df.locality_name.str.title() h = Hierarchy(["state", "municipality", "locality"]) parent_code_table = repeated_table_to_parent_id_table(df, h) parent_id_table = parent_code_table_to_parent_id_table(parent_code_table, h) c = Classification(parent_id_table, h) c.to_csv("out/locations_mexico_inegi.csv") c.to_stata("out/locations_mexico_inegi.dta")
def classify(self): print("\nclassify") self.process_classification_data() self.split_data(0.7) c = Classification(self.training_data, self.test_data) c.decision_tree_classifier() c.random_forest_classifier() c.naive_bayes() c.logistic_regression() c.gbtc() c.lsvc()
if __name__ == "__main__": assert(len(sys.argv) == 3) file_name = sys.argv[1] new_file_prefix = sys.argv[2] df = pd.read_table(file_name, encoding="utf-16") df = parse_dane(df) df = df[~df.duplicated(["code"])] df = df.reset_index(drop=True) df.columns = ["name", "level", "code"] df.name = df.name.str.title() from classification import (parent_code_table_to_parent_id_table, Classification, Hierarchy, ordered_table_to_parent_code_table) h = Hierarchy(DANE_HIERARCHY) df = ordered_table_to_parent_code_table(df, h) df = parent_code_table_to_parent_id_table(df, h) c = Classification(df, h) # weird bug where pandas infer_type was returning mixed instead of string c.table.code = c.table.code.astype(str) c.to_csv(new_file_prefix + ".csv") c.to_stata(new_file_prefix + ".dta")
class Pipeline(object): def __init__(self, method=None, data_fold=None, full_dataset=True, metal_groupThres=0.1, thatch_groupThres=0.1, groupBounds=False, erosion=0, suppress=None, pickle_viola=None,# single_detector=True, in_path=None, out_path=None, neural=None, ensemble=None, detector_params=None, pipe=None, out_folder_name=None, net_threshold=0.5): ''' Parameters: ------------------ groupThres bool Decides if we should do grouping on neural detections method:string Can be either 'viola' or 'sliding_window' ''' assert method=='viola' or method=='slide' self.method = method self.full_dataset = full_dataset self.data_fold = data_fold self.groupThres = dict() self.groupThres['thatch'] = float(metal_groupThres) self.groupThres['metal'] = float(thatch_groupThres) self.groupBounds = groupBounds self.erosion = erosion #self.single_detector = single_detector self.in_path = in_path if DEBUG: self.img_names = [img_name for img_name in os.listdir(self.in_path) if img_name.endswith('jpg')][:1] else: self.img_names = [img_name for img_name in os.listdir(self.in_path) if img_name.endswith('jpg')] self.out_path = out_path #Setup Viola: if we are given an evaluation directly, don't bother running viola if self.method == 'viola': self.pickle_viola = pickle_viola if self.pickle_viola is None: self.viola = ViolaDetector(pipeline=True, out_path=out_path, in_path=in_path, folder_name=out_folder_name, save_imgs=True, **detector_params) else: with open(pickle_viola, 'rb') as f: self.viola_evaluation = pickle.load(f) self.viola_evaluation.in_path = self.in_path self.viola_evaluation.out_path = self.out_path #Setup the sliding window elif self.method == 'slide': self.slider = SlidingWindowNeural(full_dataset=self.full_dataset, in_path=self.in_path, out_path=self.out_path, **detector_params) else: raise ValueError('Need to specific either viola or sliding window') self.ensemble = ensemble #EVALUATION OBJECTS self.auc_thresholds = [.5] self.detections_after_neural = list() self.evaluation_after_neural = list() detector_names = detector_params['detector_names'] if self.method=='viola' else None for thres in self.auc_thresholds: detections = Detections() self.detections_after_neural.append(detections) self.evaluation_after_neural.append(Evaluation(detections=detections, method='pipeline', save_imgs=True, out_path=self.out_path, auc_threshold=thres, folder_name=out_folder_name, in_path=self.in_path, detector_names=detector_names)) self.auc = AucCurve(self.img_names, self.evaluation_after_neural[0].correct_roofs, self.out_path, self.method) if self.data_fold == utils.TESTING: self.classification = Classification(self.img_names, self.out_path, self.evaluation_after_neural[0].correct_roofs, self.method) print self.img_names self.neural_time = defaultdict(int) self.viola_time = defaultdict(int) def run(self, img_type='inhabited', img_names=None, in_path=None): ''' 1. Find proposals using ViolaJones or sliding window 2. Resize the window and classify it 3. Net returns a list of the roof coordinates of each type - saved in roof_coords ''' img_names = img_names if img_names is not None else self.img_names in_path = in_path if in_path is not None else self.in_path for i, img_name in enumerate(img_names): print '***************** Image {0}: {1}/{2} *****************'.format(img_name, i, len(img_names)-1) #VIOLA: currently it does no scoring, we commented out in viola_detector.py rect_detections = dict() if self.method == 'viola': if self.pickle_viola is None: img = self.viola.detect_roofs(img_name=img_name, in_path=in_path) current_viola_detections = self.viola.viola_detections self.viola_time[img_type] = self.viola.evaluation.detections.total_time else:#use the pickled detections for speed in testing the neural network current_viola_detections = self.viola_evaluation.detections self.viola_time[img_type] = self.viola_evaluation.detections.total_time proposal_patches, proposal_coords, img_shape = self.find_viola_proposals(current_viola_detections, img_name=img_name, in_path=in_path) for roof_type in utils.ROOF_TYPES: if len(proposal_coords[roof_type]) > 0: rect_detections[roof_type] = utils.polygons2boxes(proposal_coords[roof_type]) else: rect_detections[roof_type] = np.array([]) #SLIDING WINDOW: also does no scoring elif self.method == 'slide': with Timer() as t: #get the roofs with sliding detector proposal_coords, rect_detections = self.slider.get_windows(img_name, in_path=in_path) #convert them to patches proposal_patches, img_shape = self.find_slider_proposals(rect_detections, img_name=img_name, in_path=in_path) print 'Sliding window detection for one image took {} seconds'.format(t.secs) else: print 'Unknown detection method {}'.format(self.method) sys.exit(-1) if in_path == self.in_path: self.print_detections(rect_detections, img_name, '_viola') #NEURALNET print 'Starting neural classification of image {}'.format(img_name) with Timer() as t: #NOTE: classified detections only has roofs with prob >= 0.5 classified_detections, probs = self.neural_classification_AUC(proposal_patches, rect_detections) print 'Classification took {} secs'.format(t.secs) self.neural_time[img_type] += t.secs #GROUPING rect_detections, probs, grouping_time = self.nonmax_suppression(rect_detections, probs) self.neural_time[img_type] += grouping_time #PRINTING DETECTIONS if in_path == self.in_path: self.print_detections({'metal':classified_detections['metal'][0],'thatch':classified_detections['thatch'][0]}, img_name, '_neural') det = dict() for roof_type in utils.ROOF_TYPES: det[roof_type] = rect_detections[roof_type][probs[roof_type]>0.5] if in_path == self.in_path: self.print_detections(det, img_name, '_grouped') #AUC AND CLASSIFICATION USING THE GROUPED DETECTIONS #only do AUC with the inhabited images if in_path == self.in_path: self.auc.set_detections(rect_detections, img_name) self.auc.set_probs(probs, img_name) #only do classification if we are using the testing set if self.data_fold == utils.TESTING: self.classification.set_detections(rect_detections, img_name) self.classification.set_probs(probs, img_name) def print_detections(self, detections, img_name, title): if detections is not None: for roof_type, detects in detections.iteritems(): img = cv2.imread(self.in_path+img_name) if img_name in self.evaluation_after_neural[0].correct_roofs[roof_type]: #the uninhabited images do not have an entry utils.draw_detections(self.evaluation_after_neural[0].correct_roofs[roof_type][img_name], img, rects=True, color=(0,255,0), thickness=6) if detects.shape[0] > 0: utils.draw_detections(detects, img, rects=True, color=(255,0,0), thickness=3) cv2.imwrite('debug/{}_{}_{}{}.jpg'.format(self.groupThres[roof_type], img_name[:-4],roof_type, title), img) def nonmax_suppression(self, rect_detections, probs): with Timer() as t: #set detections and score for roof_type in utils.ROOF_TYPES: #proper non max suppression from Felzenszwalb et al. if len(rect_detections[roof_type]) > 0: rect_detections[roof_type], probs[roof_type] = suppression.non_max_suppression(rect_detections[roof_type], probs[roof_type], overlapThres = self.groupThres[roof_type]) print 'Grouping took {} seconds'.format(t.secs) return rect_detections, probs, t.secs def get_correct_class_per_detection(self,rect_detections, img_name): #this is needed to build the Recall precision curve #get the best class guess of the detections by scoring it with ground truth self.slider.detections.set_detections(roof_type='thatch', detection_list=rect_detections['thatch'], img_name=img_name) self.slider.detections.set_detections(roof_type='metal', detection_list=rect_detections['metal'], img_name=img_name) #score the image self.slider.evaluation.score_img(img_name=img_name, img_shape=(-1,-1), fast_scoring=True) #since we use fast scoring, we don't need the img_shape #get the proper class by looking at the best score for each detection correct_classes = dict() for roof_type in utils.ROOF_TYPES: correct_classes[roof_type] = np.zeros((len(rect_detections[roof_type]))) for d, (detection, score) in enumerate(self.slider.detections.best_score_per_detection[img_name][roof_type]): correct_classes[roof_type][d] = 0 if score<0.5 else 1 correct_classes[roof_type] = list(correct_classes[roof_type]) return correct_classes def group_min_bound(self, polygons, img_shape, erosion=0): ''' Attempt at finding the minbound of all overlapping rects and merging them to a single detection. This unfortunately will merge nearby roofs. ''' bitmap = np.zeros(img_shape, dtype='uint8') utils.draw_detections(np.array(polygons), bitmap, fill=True, color=1) if erosion>0: kernel = np.ones((5,5),np.uint8) bitmap = cv2.erode(bitmap,kernel,iterations = erosion) #get contours contours, hierarchy = cv2.findContours(bitmap, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) #get the min bounding rect for the rects min_area_conts = [np.int0(cv2.cv.BoxPoints(cv2.minAreaRect(cnt))) for cnt in contours] return min_area_conts def find_viola_proposals(self, viola_detections, img_name=None, in_path=None): '''Call viola to find coordinates of candidate roofs. Extract those patches from the image, tranform them so they can be fed to neural network. Return both the coordinates and the patches. ''' in_path = self.in_path if in_path is None else in_path try: img_full = cv2.imread(in_path+img_name, flags=cv2.IMREAD_COLOR) img_shape = img_full.shape except IOError as e: print e sys.exit(-1) all_proposal_patches = dict() all_proposal_coords = dict() #extract patches for neural network classification for roof_type in ['metal', 'thatch']: all_proposal_coords[roof_type] = viola_detections.get_detections(img_name=img_name, roof_type=roof_type) #all_proposal_coords[roof_type] = self.viola.viola_detections.get_detections(img_name=img_name, roof_type=roof_type) patches = np.empty((len(all_proposal_coords[roof_type]), 3, utils.PATCH_W, utils.PATCH_H)) for i, detection in enumerate(all_proposal_coords[roof_type]): #extract the patch from the image using utils code img = utils.four_point_transform(img_full, detection) #transform the patch using utils code patch = utils.cv2_to_neural(img) patches[i, :, :,:] = patch all_proposal_patches[roof_type] = patches return all_proposal_patches, all_proposal_coords, img_shape def find_slider_proposals(self, slider_rects, img_name=None, in_path=None): #rects are in the form of (x, y, w, h) in_path = self.in_path if in_path is None else in_path try: img_full = cv2.imread(in_path+img_name, flags=cv2.IMREAD_COLOR) img_shape = img_full.shape except IOError as e: print e sys.exit(-1) all_proposal_patches = dict() #extract patches for neural network classification for roof_type in ['metal', 'thatch']: patches = np.empty((len(slider_rects[roof_type]), 3, utils.PATCH_W, utils.PATCH_H)) for i, rect in enumerate(slider_rects[roof_type]): #extract the patch from the image using utils code img = img_full[rect.ymin:rect.ymax, rect.xmin:rect.xmax, :] #transform the patch using utils code patch = utils.cv2_to_neural(img) patches[i, :, :,:] = patch all_proposal_patches[roof_type] = patches return all_proposal_patches, img_shape def process_viola(self, rows, cols, img_path=None, verbose=False): #Find candidate roof contours using Viola for all types of roof #returns list with as many lists of detections as the detectors we have passed self.viola.detect_roofs(img_name=self.img_name, img_path=self.test_img_path+self.img_name) print 'Detected {0} candidate roofs'.format(len(self.viola.roofs_detected[self.img_name])) if verbose: self.viola.mark_detections_on_img(img=self.image, img_name=self.img_name) #get the mask and the contours for the detections detection_mask, _ = self.viola.get_patch_mask(img_name=self.img_name, rows=rows, cols=cols) patch_location = self.out_path+self.img_name+'_mask.jpg' misc.imsave(patch_location, detection_mask) self.all_contours[self.img_name] = self.viola.get_detection_contours(patch_location, self.img_name) def neural_classification(self, proposal_patches, proposal_coords): classified_detections = defaultdict(list) for roof_type in utils.ROOF_TYPES: #classify with neural network if proposal_patches[roof_type].shape[0] > 1: if self.single_detector: #we have a single net classes = np.array(self.net.test(proposal_patches[roof_type])) #filter according to classification for detection, classification in zip(proposal_coords[roof_type], classes): if classification == utils.NON_ROOF: classified_detections['background'].append(detection) elif classification == utils.METAL: classified_detections['metal'].append(detection) elif classification == utils.THATCH: classified_detections['thatch'].append(detection) else: #we have one net per roof type specific_net = self.net[roof_type] classes = specific_net.test(proposal_patches[roof_type]) #filter according to classification for detection, classification in zip(proposal_coords[roof_type], classes): if classification == 0: classified_detections['background'].append(detection) elif classification == 1: classified_detections[roof_type].append(detection) else: raise ValueError('Unknown classification of patch') else: print 'No {0} detections'.format(roof_type) return classified_detections def neural_classification_AUC(self, proposal_patches, proposal_coords): #get the classification by evaluating it compared to the real roofs #get the probability of it being that type of roof classified_detections = dict() probs = dict() for roof_type in utils.ROOF_TYPES: classified_detections[roof_type] = list() if proposal_patches[roof_type].shape[0] > 1: probs[roof_type] = self.ensemble.predict_proba(proposal_patches[roof_type], roof_type=roof_type) #different detections depending on threshold coords = np.array(proposal_coords[roof_type]) for thres in self.auc_thresholds: detections_logical = probs[roof_type]>=thres classified_detections[roof_type].append(coords[detections_logical]) else: print 'No {0} detections'.format(roof_type) for thres in self.auc_thresholds: classified_detections[roof_type].append(np.array([])) probs[roof_type] = np.array([]) return classified_detections, probs def save_img_detections(self, img_name, proposal_coords, predictions, in_path=None): raise ValueError('Incorrect method') in_path = self.in_path if in_path is None else in_path img = cv2.imread(self.in_path+img_name) roofs = DataLoader().get_roofs(in_path+img_name[:-3]+'xml', img_name) for roof in roofs: cv2.rectangle(img, (roof.xmin, roof.ymin), (roof.xmin+roof.width, roof.ymin+roof.height), (0,255,0), 2) for (x,y,w,h), accept in zip(proposal_coords['metal'], predictions[img_name]['metal']): color = (0,0,0) if accept==1 else (0,0,255) cv2.rectangle(img, (x,y), (x+w, y+h), color, 2) cv2.imwrite(self.out_path+img_name, img)
df = (pd.read_csv( "./in/Mexico Country codes - continents - Countries.csv", encoding="utf-8", dtype={ "continent_code": str }, ).rename(columns={ "continent_code": "parent_code" }).drop("total_export", axis=1)) df["level"] = "country" regions = pd.read_table( "./in/Mexico Country codes - continents - Continents - Regions.tsv", encoding="utf-8", ).rename(columns={"name": "name_en"}) regions["name_short_en"] = regions["name_en"] regions["name_short_es"] = regions["name_es"] regions["level"] = "region" regions["code"] = regions["code"].astype(unicode) df = pd.concat([df, regions]).reset_index(drop=True) h = Hierarchy(["region", "country"]) parent_id_table = parent_code_table_to_parent_id_table(df, h) parent_id_table["name"] = parent_id_table["name_en"] c = Classification(parent_id_table, h) c.to_csv("out/locations_international_mexico.csv") c.to_stata("out/locations_international_mexico.dta")
def __init__(self, method=None, data_fold=None, full_dataset=True, metal_groupThres=0.1, thatch_groupThres=0.1, groupBounds=False, erosion=0, suppress=None, pickle_viola=None,# single_detector=True, in_path=None, out_path=None, neural=None, ensemble=None, detector_params=None, pipe=None, out_folder_name=None, net_threshold=0.5): ''' Parameters: ------------------ groupThres bool Decides if we should do grouping on neural detections method:string Can be either 'viola' or 'sliding_window' ''' assert method=='viola' or method=='slide' self.method = method self.full_dataset = full_dataset self.data_fold = data_fold self.groupThres = dict() self.groupThres['thatch'] = float(metal_groupThres) self.groupThres['metal'] = float(thatch_groupThres) self.groupBounds = groupBounds self.erosion = erosion #self.single_detector = single_detector self.in_path = in_path if DEBUG: self.img_names = [img_name for img_name in os.listdir(self.in_path) if img_name.endswith('jpg')][:1] else: self.img_names = [img_name for img_name in os.listdir(self.in_path) if img_name.endswith('jpg')] self.out_path = out_path #Setup Viola: if we are given an evaluation directly, don't bother running viola if self.method == 'viola': self.pickle_viola = pickle_viola if self.pickle_viola is None: self.viola = ViolaDetector(pipeline=True, out_path=out_path, in_path=in_path, folder_name=out_folder_name, save_imgs=True, **detector_params) else: with open(pickle_viola, 'rb') as f: self.viola_evaluation = pickle.load(f) self.viola_evaluation.in_path = self.in_path self.viola_evaluation.out_path = self.out_path #Setup the sliding window elif self.method == 'slide': self.slider = SlidingWindowNeural(full_dataset=self.full_dataset, in_path=self.in_path, out_path=self.out_path, **detector_params) else: raise ValueError('Need to specific either viola or sliding window') self.ensemble = ensemble #EVALUATION OBJECTS self.auc_thresholds = [.5] self.detections_after_neural = list() self.evaluation_after_neural = list() detector_names = detector_params['detector_names'] if self.method=='viola' else None for thres in self.auc_thresholds: detections = Detections() self.detections_after_neural.append(detections) self.evaluation_after_neural.append(Evaluation(detections=detections, method='pipeline', save_imgs=True, out_path=self.out_path, auc_threshold=thres, folder_name=out_folder_name, in_path=self.in_path, detector_names=detector_names)) self.auc = AucCurve(self.img_names, self.evaluation_after_neural[0].correct_roofs, self.out_path, self.method) if self.data_fold == utils.TESTING: self.classification = Classification(self.img_names, self.out_path, self.evaluation_after_neural[0].correct_roofs, self.method) print self.img_names self.neural_time = defaultdict(int) self.viola_time = defaultdict(int)
def __init__(self, O_O_distance=2.8, O_H_distance=1.0, intermediate_saves=[], folder="wales_21+", group_saves=[]): WaterAlgorithm() self.initialize(O_H_distance=O_H_distance, O_O_distance=O_O_distance, intermediate_saves=intermediate_saves, group_saves=group_saves, folder=folder, charge=1, do_symmetry_check=False, order=[3,14,9,2,10,19,16,15,7,18,4,0,17,1,12,6,5,8,20,11,13]) self.N = 21 self.classification = Classification(self)
if __name__ == "__main__": names = pd.read_table( "./in/AgProducts_Expanded_Names.tsv", encoding="utf-8", dtype={"code": str} ) hierarchy = pd.read_table( "./in/AgProducts_Expanded_Hierarchy.tsv", encoding="utf-8" ) hierarchy.columns = ["level3_code", "level2_code", "level1_code", "level0_code"] fields = {"level0": [], "level1": [], "level2": [], "level3": []} h = Hierarchy(["level0", "level1", "level2", "level3"]) parent_code_table = repeated_table_to_parent_id_table(hierarchy, h, fields) parent_code_table.code = parent_code_table.code.astype(str) parent_code_table = parent_code_table.merge(names, on=["code", "level"]) parent_id_table = parent_code_table_to_parent_id_table(parent_code_table, h) parent_id_table["name"] = parent_id_table.name_en parent_id_table = parent_id_table[ ["code", "name", "level", "name_en", "name_es", "parent_id"] ] c = Classification(parent_id_table, h) c.table.code = c.table.code.str.lower() c.to_csv("out/agricultural_products_expanded.csv") c.to_stata("out/agricultural_products_expanded.dta")
def test_sigmoid_vector(self): small_sig = Classification._sigmoid(np.array([-10000, -10000, -10000])) large_sig = Classification._sigmoid(np.array([10000, 10000, 10000])) for s_val, l_val in zip(small_sig, large_sig): self.assertAlmostEqual(s_val, 0) self.assertAlmostEqual(l_val, 1)
import os.path from flask import Flask, request, redirect, url_for, jsonify from classification import Classification FLAGS = None BOT = None # Start web server application = Flask(__name__) @application.route('/chat', methods=['POST']) def chat(): text = request.get_data(as_text=True) result = BOT.handle(text) return jsonify(result) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--port', type=int, default=8080, help='Port for http server to listen on.') FLAGS, unparsed = parser.parse_known_args() # Creates NLP chat bot. BOT = Classification() application.run(host='0.0.0.0', port=FLAGS.port)
def test_grad(self): # Unsure what the grad should come to. but I think a vector of length = to number of features (+1 for intercept) theta_z = pd.Series(np.zeros(len(self.X.columns))) clsfy = Classification(self.X, self.y) grad = clsfy._gradient(theta_z) print(grad)
df.name_english = df.name_english.str.replace(", $", "") h = Hierarchy( ["twodigit", "threedigit", "fourdigit", "fivedigit", "sixdigit"]) df.loc[df.code.str.len() == 2, "level"] = "twodigit" df.loc[df.code.str.len() == 3, "level"] = "threedigit" df.loc[df.code.str.len() == 4, "level"] = "fourdigit" df.loc[df.code.str.len() == 5, "level"] = "fivedigit" df.loc[df.code.str.len() == 6, "level"] = "sixdigit" spanish = df[["code", "level", "name_spanish"]] spanish.columns = ["code", "level", "name_es"] # make sure this is the hand-fixed version assert df.loc[304, "code"] == '31' df = df[["code", "name_english", "level"]] df.columns = ["code", "name", "level"] parent_code_table = ordered_table_to_parent_code_table(df, h) parent_id_table = parent_code_table_to_parent_id_table( parent_code_table, h) parent_id_table = parent_id_table.merge(spanish, on=["level", "code"]) c = Classification(parent_id_table, h) c.to_csv("out/industries_mexico_scian_2007.csv") c.to_stata("out/industries_mexico_scian_2007.dta")
ids = os.listdir(datapath) for id in ids: idpath = os.path.join(datapath, id) idfaces = os.listdir(idpath) for id_train_face in idfaces: if id_train_face.split('.')[-1] != 'jpg': continue img = cv2.imread(os.path.join(idpath, id_train_face)) img = cv2.resize(img, input_shape) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) X.append(img) y.append(ids.index(id)) X = np.asarray(X).astype('float32') y = np.asarray(y) X /= 255 return X, y input_shape = (180, 180) num_classes = 7 x_train, y_train = load_faces_data('/data_out/siamese_faces/train') x_test, y_test = load_faces_data('/data_out/siamese_faces/test') siam = Siamese(x_train, y_train, x_test, y_test, input_shape, num_classes) siam.train(epochs=5) classifier = Classification(x_train, y_train, x_test, y_test, input_shape, num_classes) classifier.train(epochs=5)
class GridSearch(object): def __init__(self, dataset_lists, feature="edge_hist", learning_model="dnn", score_type="all"): self.feature = feature self.learning_model = learning_model self.score_type = score_type self.best_accuracy = 0 self.best_recall = 0 self.best_params = {} self.best_confusion_matrix = 0 self.clf = Classification(dataset_lists, feature, learning_model, output_every_scores=False) def run(self): print(f"{'='*30} grid search {'='*30}") print(f"{'='*10} feature: {self.feature}, " f"model: {self.learning_model}, " f"score_type: {self.score_type} {'='*10}") if self.learning_model == "dnn": self._dnn_grid_search() elif self.learning_model == "svm": self._svm_grid_search() self._output_best_scores() def _dnn_grid_search(self): for alpha in ALPHA: self.params = { "hidden_layer_sizes": HIDDEN_LAYER_SIZES, "alpha": alpha, "max_iter": MAX_ITER, "random_state": RANDOM_STATE, } self.grid_search() def _svm_grid_search(self): for c in PARAMS: for gamma in PARAMS: self.params = { "C": c, "gamma": gamma, "decision_function_shape": DECISION_FUNCTION_SHAPE, "random_state": RANDOM_STATE, } self.grid_search() def grid_search(self): self.accuracy, self.recall, self.confusion_matrix = \ self.clf.train_and_test(**self.params) if self.score_type == "all": self._update_best_scores() elif self.score_type == "fake": self._update_best_fake_scores() # 正解率を重視 def _update_best_scores(self): if self.accuracy > self.best_accuracy: self._update_scores() # 検出率を重視 def _update_best_fake_scores(self): if self.recall > self.best_recall: self._update_scores() def _update_scores(self): self.best_accuracy = self.accuracy self.best_recall = self.recall self.best_params = self.params self.best_confusion_matrix = self.confusion_matrix def _output_best_scores(self): print(f"best params: {self.best_params}") print(f"best accuracy: {self.best_accuracy * 100} %") print(f"best recall: {self.best_recall * 100} %") print(self.best_confusion_matrix)
from classification import (Hierarchy, ordered_table_to_parent_code_table, parent_code_table_to_parent_id_table, Classification) if __name__ == "__main__": sinco = pd.read_csv("in/SINCO_2011.csv", header=None, encoding="latin-1") sinco.columns = ["data"] sinco = sinco[~sinco.data.str.startswith("INEGI.")] sinco = sinco[~sinco.data.str.startswith(u"Clave Descripción")] for index, row in reversed(list(sinco[~sinco.data.str.match("^\d* ")].iterrows())): sinco.ix[index - 1] += (" " + sinco.ix[index]) sinco = sinco[sinco.data.str.match("^\d* ")] sinco = sinco.data.str.split(" ", 1).apply(pd.Series, 1) sinco.columns = ["code", "name"] sinco["level"] = sinco["code"].apply(lambda x: str(len(x)) + "digit") h = Hierarchy(["1digit", "2digit", "3digit", "4digit"]) parent_code_table = ordered_table_to_parent_code_table(sinco, h) parent_id_table = parent_code_table_to_parent_id_table(parent_code_table, h) c = Classification(parent_id_table, h) c.to_csv("out/occupations_sinco_2011.csv") c.to_stata("out/occupations_sinco_2011.dta")
df.parent_id = df.parent_id.astype(float) h = Hierarchy(["country", "department", "msa", "municipality"]) df.level = df.level.astype("category", categories=h, ordered=True) df.level = df.level.astype(str) # Drop old Callao department and province # Do this after reset_index to not mess up the id order df = df[df.code != "070000"] df = df[df.code != "070100"] # Order the columns df = df[ [ "code", "name", "level", "name_es", "name_en", "name_short_es", "name_short_en", "parent_id", ] ] c = Classification(df, h) c.to_csv("out/locations_peru_datlas.csv") c.to_stata("out/locations_peru_datlas.dta")
df = pd.read_table("in/DIVIPOLA_20150331.txt", encoding="utf-16") df.columns = ["department_code", "municipality_code", "population_center_code", "department_name", "municipality_name", "population_center_name", "population_center_type", "longitude", "", "latitude", "district", "municipality_type", "metro_area"] df = df[["department_code", "department_name", "municipality_code", "municipality_name", "population_center_code", "population_center_name"]] df.department_code = df.department_code.astype(str).str.zfill(2) df.municipality_code = df.municipality_code.astype(str).str.zfill(5) df.population_center_code = df.population_center_code.astype(str).str.zfill(8) df.department_name = df.department_name.str.title() df.municipality_name = df.municipality_name.str.title() df.population_center_name = df.population_center_name.str.title() h = Hierarchy(["department", "municipality", "population_center"]) parent_code_table = repeated_table_to_parent_id_table(df, h) parent_id_table = parent_code_table_to_parent_id_table(parent_code_table, h) c = Classification(parent_id_table, h) c.to_csv("out/locations_colombia_dane.csv") c.to_stata("out/locations_colombia_dane.dta")
hierarchy.columns = ["level2_code", "level1_code", "level0_code"] fields = {"level0": [], "level1": [], "level2": []} h = Hierarchy(["level0", "level1", "level2"]) parent_code_table = repeated_table_to_parent_id_table(hierarchy, h, fields) parent_code_table.code = parent_code_table.code.astype(str) parent_code_table = parent_code_table.merge(names, on=["code", "level"]) parent_id_table = parent_code_table_to_parent_id_table(parent_code_table, h) parent_id_table["name"] = parent_id_table.name_en parent_id_table = parent_id_table[ [ "code", "name", "level", "name_en", "name_es", "name_short_en", "name_short_es", "parent_id", ] ] c = Classification(parent_id_table, h) c.to_csv("out/land_use.csv") # c.to_stata("out/land_use.dta")
# Replace trailing comma and space df.name_spanish = df.name_spanish.str.replace(", $", "") df.name_english = df.name_english.str.replace(", $", "") h = Hierarchy(["twodigit", "threedigit", "fourdigit", "fivedigit", "sixdigit"]) df.loc[df.code.str.len() == 2, "level"] = "twodigit" df.loc[df.code.str.len() == 3, "level"] = "threedigit" df.loc[df.code.str.len() == 4, "level"] = "fourdigit" df.loc[df.code.str.len() == 5, "level"] = "fivedigit" df.loc[df.code.str.len() == 6, "level"] = "sixdigit" spanish = df[["code", "level", "name_spanish"]] spanish.columns = ["code", "level", "name_es"] # make sure this is the hand-fixed version assert df.loc[304, "code"] == "31" df = df[["code", "name_english", "level"]] df.columns = ["code", "name", "level"] parent_code_table = ordered_table_to_parent_code_table(df, h) parent_id_table = parent_code_table_to_parent_id_table(parent_code_table, h) parent_id_table = parent_id_table.merge(spanish, on=["level", "code"]) c = Classification(parent_id_table, h) c.to_csv("out/industries_mexico_scian_2007.csv") c.to_stata("out/industries_mexico_scian_2007.dta")
''' predict.py有几个注意点 1、无法进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。 2、如果想要将预测结果保存成txt,可以利用open打开txt文件,使用write方法写入txt,可以参考一下txt_annotation.py文件。 ''' from PIL import Image from classification import Classification classfication = Classification() while True: img = input('Input image filename:') try: image = Image.open(img) except: print('Open Error! Try again!') continue else: class_name = classfication.detect_image(image) print(class_name)
def main(): parser = OptionParser() parser.add_option("--stemmer-language", dest="stemmer_language", help="Language for SnowballStemmer", default="english") parser.add_option('-i', action="store_true", dest="ignore_stopwords_stemmer", help="Ignore stopwords in stemmer, default false", default=False) parser.add_option("--stopwords-language", dest="stopwords_language", help="Language for stopwords") parser.add_option("-k", action="store_true", dest="keep_stopwords", help="Keep stopwords, default remove", default=False) parser.add_option('--load-classifier', dest="load_classifier_file_path", help="Specify load classifiers file") parser.add_option('--create-classifier', dest="create_classifier", help="File for training set") parser.add_option('--row-training-set', dest="row_training_set", help="Number of row for training set", default=1000) parser.add_option('-r', action="store_true", dest="random_row_training_set", help="Get random row from training set file", default=False) parser.add_option('--text-field', dest="text_field", help="text field in json file", default="text") parser.add_option('--word-tokenize-language', dest="word_tokenize_language", help="Word tokenize language", default="english") parser.add_option('--classification-field', dest="classification_field", help="Classification field in json data", default="category") parser.add_option('--dump-classifier', dest="dump_classifier", help="Dump classifier file", default=False) parser.add_option('-a', action="store_true", dest="calculate_accuracy", help="Calculate accuracy", default=False) parser.add_option('--test-file-path', dest="test_file_path", help="Test file path") parser.add_option('--row-test-set', dest="row_test_set", help="Number of row for test set", default=500) parser.add_option('--random-row-test-set', action="store_true", dest="random_row_test_set", help="Get random row from test set file", default=False) parser.add_option('--test-text-field', dest="test_text_field", help="text field in json test file", default="text") parser.add_option('--test-classification-field', dest="test_classification_field", help="classificaion field in json test file", default="category") parser.add_option('--classify', dest="classify_text", help="classify text", default=False) (options, args) = parser.parse_args(sys.argv) cl = Classification( stemmer_language=options.stemmer_language, stopwords_language=options.stopwords_language, ignore_stopwords_stemmer=options.ignore_stopwords_stemmer, ) if options.load_classifier_file_path: cl.load_classifier(load_classifier_file_path=options.load_classifier_file_path) elif options.create_classifier: cl.create_and_train_classifier( training_file_path=options.create_classifier, keep_stopwords=options.keep_stopwords, row_training_set=options.row_training_set, random_row_training_set=options.random_row_training_set, text_field=options.text_field, word_tokenize_language=options.word_tokenize_language, classification_field=options.classification_field ) if options.dump_classifier: cl.dump_classifier(options.dump_classifier) if options.calculate_accuracy: cl.accuracy( test_file_path=options.test_file_path, keep_stopwords=options.keep_stopwords, row_test_set=options.row_test_set, random_row_test_set=options.random_row_test_set, text_field=options.test_text_field, word_tokenize_language=options.word_tokenize_language, classification_field=options.test_classification_field ) if(options.classify_text): cl.classify( text=options.classify_text, keep_stopwords=options.keep_stopwords, word_tokenize_language=options.word_tokenize_language )
trans = trans.apply(fill_code, axis=1) # Prospedia specific trans = trans[trans.level != "section"] df = pd.read_table("./in/prospedia_hs_structure.txt") df.columns = ["4digit_code", "2digit_code", "prospedia_section_code"] df["4digit_code"] = df["4digit_code"].astype(str).str.zfill(4) df["4digit_name"] = None df["2digit_code"] = df["2digit_code"].astype(str).str.zfill(2) df["2digit_name"] = None df["prospedia_section_name"] = None df["prospedia_section_code"] = df["prospedia_section_code"].astype(str).str.zfill(1) h = Hierarchy(["prospedia_section", "2digit", "4digit"]) parent_code_table = repeated_table_to_parent_id_table(df, h) parent_id_table = parent_code_table_to_parent_id_table(parent_code_table, h) parent_id_table = parent_id_table.merge(trans, on=["level", "code"]) parent_id_table.name = parent_id_table.name_en assert parent_id_table.name.isnull().sum() == 3 parent_id_table.loc[parent_id_table.name.isnull(), "name"] = u"No name" assert parent_id_table.name.isnull().sum() == 0 c = Classification(parent_id_table, h) c.to_csv("out/products_mexico_prospedia.csv") c.to_stata("out/products_mexico_prospedia.dta")