def simpleclassify(self, auxiliar=None): Classifier.simpleclassify(self, auxiliar) vfte = map(SVM._featureVectorFromListToDict, self._vfte) vlte = [label if label is not None else self._unknown_label for label in self._vlte] vlpr, apr, vpr = self._prediction_function(vlte, vfte, self._model, ' -q ') vlpr = [label if label != self._unknown_label else None for label in vlpr] if auxiliar is not None: for x in vpr: auxiliar.append(x) assert len(vlpr) == len(vfte) del vfte del vlte return vlpr
def built_model(model_type): dataset.table_bert = False if model_type == 'qdlstm': embedder = CellEmbedder(indexer) pixelrnn = PixelRNN(embedder._output_dim, table_size_limit, hidden_size=64, num_lstm_layers=1, classifier=True) model = Classifier(embedder, pixelrnn) elif model_type == 'resnet': embedder = CellEmbedder(indexer) model = TBResNet18(embedder._output_dim, dropout=dropout) model = Classifier(embedder, model) elif model_type == 'tabnet': embedder = CellEmbedder(indexer, char_emb=False, attention_dim=-1) model = TabNet(embedder._output_dim, table_size_limit) model = Classifier(embedder, model) else: embedder = CellEmbedder(indexer, hidden_size=hidden_size) pixelrnn = PixelRNN(embedder._output_dim, table_size_limit, hidden_size=hidden_size, num_lstm_layers=1, classifier=False) resnet = TBResNet18(pixelrnn._output_dim, crossconv=table_size_limit, dropout=dropout) # tabnet = TabNet(pixelrnn._output_dim, table_size_limit) combined = torch.nn.Sequential( pixelrnn, resnet, ) model = Classifier(embedder, combined) return model
def run_classification_test(): corpus_id = SessionConfigReader.read_value(SetupRunner.corpus_id_key) vectorized_df_id = corpus_id + SetupRunner.ext_vectorized train_df_id = vectorized_df_id + SetupRunner.ext_train test_df_id = vectorized_df_id + SetupRunner.ext_test Storage.delete_pd_frame(train_df_id) Storage.delete_pd_frame(test_df_id) Storage.delete_h5_model(SessionConfigReader.read_value(SetupRunner.keras_nn_model_id_key)) vectorized_df = Storage.load_pd_frame(vectorized_df_id) TrainTestSplitter.split_train_test(identifier=vectorized_df_id, data_frame=vectorized_df) train_df_id = vectorized_df_id + SetupRunner.ext_train train = Storage.load_pd_frame(train_df_id) test_df_id = vectorized_df_id + SetupRunner.ext_test test = Storage.load_pd_frame(test_df_id) train_classification_outs = ClassificationInterpreter.create_out_vectors(train) Classifier.create_model(train_classification_outs) test_classified = Classifier.classify(test) test_interpreted = ClassificationInterpreter.interpret_output(test_classified) score = ClassificationInterpreter.evaluate_output(test_interpreted) EvaluationHandler.add_evaluation(score) return test_interpreted
def run_OMR(inputPath, classifiersPath): image, useAugmented = Preprocessing.read_and_preprocess_image(inputPath) Processing = Pipeline.Augmented if useAugmented else Pipeline.Standard Classifier.load_classifiers(classifiersPath) image = Processing.remove_brace(image) lineImage, staffDim = Processing.extract_staff_lines(image) groups = Processing.split_bars(image, lineImage, staffDim) output = [] for group in groups: components, sanitized, staffDim, lineImage, dotBoxes = Processing.segment_image( group) Classifier.assign_components(sanitized, components, staffDim) Processing.join_meters(components) Processing.bind_accidentals_to_following_notes(components) Processing.bind_dots_to_notes(components, dotBoxes) Processing.assign_note_tones(components, sanitized, lineImage, staffDim, group) output.append(Display.get_guido_notation(components)) return output
def __init__(self, descriptors, labels, method='kmeans50'): self.descriptors = descriptors self.cluster_centers = self.compute_cluster_centers(method) self.nearest_neighbors = NearestNeighbors(algorithm='auto') self.nearest_neighbors.fit(self.cluster_centers) self.codebook_histograms = self.calculate_histograms(self.descriptors) self.classifier = Classifier(self.codebook_histograms, labels)
def simple__init__(self): """ bc: binary classifier """ Classifier.simple__init__(self) assert self._parameters.has_key('bc') self._bc = self._parameters['bc'] self._approach = (self._parameters['approach'] if self._parameters.has_key('approach') else 'OVA') """ self._ovaonlyone - OVA only one - this variable indicates if one and only one binary classifier must classify as positive to the general MCBB classifier classify as one of the available classes. Otherwise, the MCBB classifier are going to classify as unknown (if self._ovaonlyone is True). """ self._ovaonlyone = (self._parameters['ovaonlyone'] if self._parameters.has_key('ovaonlyone') else False) self._gridsearch = False assert self._approach in ['OVO', 'OVA']
def classify(self, args, filename): # If user is running only the classifying module if filename == "": filename = args[-1] if "-c" in args: classifier = Classifier(filename) classifier.classify()
def __init__(self, parent=None, is_good_kid=False, arch_code_len=0, is_root=False): # Note: every node is initialized as a leaf, # only internal nodes equip with classifiers to make decisions if not is_root: assert type(parent) == type(self) self.is_root = is_root self.ARCH_CODE_LEN = arch_code_len self.x_bar = float("inf") self.n = 0 self.classifier = Classifier({}, self.ARCH_CODE_LEN) self.parent = parent self.is_good_kid = is_good_kid self.uct = 0 #insert curt into the kids of parent if parent is not None: self.parent.kids.append(self) if self.parent.is_leaf == True: self.parent.is_leaf = False assert len(self.parent.kids) <= 2 self.kids = [] self.bag = {} self.good_kid_data = {} self.bad_kid_data = {} self.is_leaf = True self.id = Node.obj_counter #data for good and bad kids, respectively Node.obj_counter += 1
def simplefit(self): Classifier.simplefit(self) assert hasattr(self, '_occ') assert self._unknown_label not in self._vltr self._one_class_classifiers = [] for clss in self._acs: if self._allsamplesgs: vl = map(lambda l: (l if l == clss else -l), self._vltr) occ = self._occ.copy() occ.fit(self._vftr, vl) else: vf, vl = map(list, zip(*[(f, l) for f, l in zip(self._vftr, self._vltr) if l == clss])) occ = self._occ.copy() occ.fit(vf, vl) self._one_class_classifiers.append(occ)
class Model(object): def __init__(self, detector_window, classifier_window): self.detector_window = detector_window self.classifier_window = classifier_window self.detector = Detector() self.detector.load() self.classifier = Classifier() self.classifier.load() self.gesture_sequence = Gesture() def __call__(self, hand_landmarks): self.gesture_sequence.push(hand_landmarks) if len(self.gesture_sequence) > self.classifier_window: self.gesture_sequence.drop_first() tail_detect_vector = self.gesture_sequence.data(-self.detector_window) detector_predict = self.detector.predict(tail_detect_vector) if detector_predict: tail_classify_vector = self.gesture_sequence.data( -self.classifier_window) classifier_predict = self.classifier.predict(tail_classify_vector) if classifier_predict != 'No gesture': return classifier_predict else: return None else: return None
def start(self): accuracy = -1.0 while accuracy < self.epsilon: self.population.evaluate(self.inputTraining, self.outputTraining) for index in range(self.iterations): print("Iteration: " + str(index)) self.iteration(index) self.population.evaluate(self.inputTraining, self.outputTraining) self.population.selection(self.nrOfIndividuals) best = self.population.best(1)[0] count = 0 print("Training accurracy", best.fitness, "%") for index in range(len(self.inputTesting)): if best.fitness == 100: print( str( Classifier.classify( best.predict(self.inputTesting[index]))) + " == " + str(self.outputTesting[index])) if Classifier.classify(best.predict(self.inputTesting[index]) ) == self.outputTesting[index]: count += 1 accuracy = float(count / len(self.inputTesting) * 100) print("Current accuracy: ", accuracy, "%") print("Best: " + str(best.root)) print("Correct guesses ", count) print("That is ", accuracy, "% accuracy") with open(self.outputFilename, "w") as f: f.write("Correct guesses " + str(count)) f.write("\nThat is " + str(accuracy) + "%")
def expected_case(cli: Classifier, percept: list) -> Classifier: """ :rtype: Classifier """ diff = get_differences(cli.mark, percept) if diff == [cons.symbol] * cons.lenCondition: cli.q += cons.beta * (1 - cli.q) return None else: spec = number_of_spec(cli.condition) spec_new = number_of_spec(diff) child = Classifier(cli) if spec == cons.uMax: remove_random_spec_att(child.condition) spec -= 1 while spec + spec_new > cons.beta: if spec > 0 and random() < 0.5: remove_random_spec_att(child.condition) spec -= 1 else: remove_random_spec_att(diff) spec_new -= 1 else: while spec + spec_new > cons.beta: remove_random_spec_att(diff) spec_new -= 1 child.condition = diff if child.q < 0.5: child.q = 0.5 child.exp = 1 assert isinstance(child, Classifier), 'Should be a Classifier' return child
def DriveFGSM(): (X_train, y_train), (X_test, y_test) = LoadData(DATA_PATH) lqvae = LQVAE(batch_size=BATCH_SIZE, max_iters=MAX_ITERS_LQVAE, latent_dim=LATENT_DIM, learnrate_init=LEARN_RATE_INIT_LQVAE, log_every=LOG_EVERY, save_every=SAVE_EVERY, image_path=IMAGE_PATH) classifier = Classifier(batch_size=BATCH_SIZE, max_iters=MAX_ITERS_CLASSIFIER, learnrate_init=LEARN_RATE_INIT_CLASSIFIER, log_every=LOG_EVERY, test_every=TEST_EVERY) lqvae.build_model_lqvae() classifier.build_model_classifier() saver = tf.train.Saver() with tf.Session() as session: saver.restore(session, MODEL_PATH) FGSM(lqvae, classifier, X_test, y_test, session, FGSM_IMAGE_PATH)
def EvaluatePerformance(): # _, (X, y) = LoadData(DATA_PATH) X, y = LoadData(DATA_PATH, file="npy") lqvae = LQVAE(batch_size=BATCH_SIZE, max_iters=MAX_ITERS_LQVAE, latent_dim=LATENT_DIM, learnrate_init=LEARN_RATE_INIT_LQVAE, log_every=LOG_EVERY, save_every=SAVE_EVERY, image_path=IMAGE_PATH) classifier = Classifier(batch_size=BATCH_SIZE, max_iters=MAX_ITERS_CLASSIFIER, learnrate_init=LEARN_RATE_INIT_CLASSIFIER, log_every=LOG_EVERY, test_every=TEST_EVERY) lqvae.build_model_lqvae() classifier.build_model_classifier() saver = tf.train.Saver() with tf.Session() as session: saver.restore(session, MODEL_PATH) Evaluate(lqvae, classifier, X, y, session)
def CalculateBitFlips(): X_adv, _ = LoadData(DATA_PATH, file="npy") _, (X_test, _) = LoadData(DATA_PATH) lqvae = LQVAE(batch_size=BATCH_SIZE, max_iters=MAX_ITERS_LQVAE, latent_dim=LATENT_DIM, learnrate_init=LEARN_RATE_INIT_LQVAE, log_every=LOG_EVERY, save_every=SAVE_EVERY, image_path=IMAGE_PATH) classifier = Classifier(batch_size=BATCH_SIZE, max_iters=MAX_ITERS_CLASSIFIER, learnrate_init=LEARN_RATE_INIT_CLASSIFIER, log_every=LOG_EVERY, test_every=TEST_EVERY) lqvae.build_model_lqvae() classifier.build_model_classifier() saver = tf.train.Saver() with tf.Session() as session: saver.restore(session, MODEL_PATH) Bits(lqvae, classifier, X_test, X_adv, session)
def __init__(self, parent = None, dims = 0, reset_id = False, kernel_type = "rbf", gamma_type = "auto"): # Note: every node is initialized as a leaf, # only internal nodes equip with classifiers to make decisions # if not is_root: # assert type( parent ) == type( self ) self.dims = dims self.x_bar = float('inf') self.n = 0 self.uct = 0 self.classifier = Classifier( [], self.dims, kernel_type, gamma_type ) #insert curt into the kids of parent self.parent = parent self.kids = [] # 0:good, 1:bad self.bag = [] self.is_svm_splittable = False if reset_id: Node.obj_counter = 0 self.id = Node.obj_counter #data for good and bad kids, respectively Node.obj_counter += 1
def simpleclassify(self, auxiliar=None): Classifier.simpleclassify(self, auxiliar) vfte = map(SVM._featureVectorFromListToDict, self._vfte) vlte = [label if label is not None else self._unknown_label for label in self._vlte] vlpr, apr, vpr = self._prediction_function(vlte, vfte, self._model, ' -q ') vlpr = [label if label != self._unknown_label and label != -1 else None # Put none whenever returns unknown or -1 (which is unknown too in SVMSH) for label in vlpr] if auxiliar is not None: for x in vpr: # this for could be replaced by auxiliar.extend(vpr) auxiliar.append(x) assert len(vlpr) == len(vfte) del vfte del vlte return vlpr
def __init__(self, detector_window, classifier_window): self.detector_window = detector_window self.classifier_window = classifier_window self.detector = Detector() self.detector.load() self.classifier = Classifier() self.classifier.load() self.gesture_sequence = Gesture()
def __init__(self): # Init basic objects self.cropper = Cropper() self.extractor = Extractor(self.cropper) self.classifier = Classifier(self.extractor.images) self.connections = Connections(self.extractor, self.classifier) self.visualization = Visualization(self.connections)
def test(): c = Classifier(None, 10) image = cv2.imread("data/orginal/im0002.jpg", 0) output = c.extractBloodVessels2(image, True) cv2.imshow("input", image) cv2.imshow("output", output) cv2.waitKey(0) cv2.destroyAllWindows()
def main(): if not os.path.exists('./gan_img'): os.mkdir('./gan_img') train_data = torchvision.datasets.MNIST( root='./mnist', # the location to save train=True, download=DOWNLOAD_MNIST, transform=my_transform ) test_data = torchvision.datasets.MNIST( root='./mnist', train=False, download=DOWNLOAD_MNIST, transform=my_transform ) train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=3) test_loader = Data.DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=3) if USE_GPU: device = torch.device("cuda:" + str(DeviceID[0])) else: device = torch.device("cpu") from Classifier import Classifier IC = Classifier() IC.load_state_dict(torch.load('c_params.pkl')) if USE_GPU: IC = nn.DataParallel(IC, device_ids=DeviceID).to(device) from A_Encoder import A_Encoder A = A_Encoder(use_gpu=USE_GPU) A.apply(initialize_weights) if USE_GPU: A = nn.DataParallel(A, device_ids=DeviceID).to(device) from Discriminator import Discriminator D = Discriminator() D.apply(initialize_weights) if USE_GPU: D = nn.DataParallel(D, device_ids=DeviceID).to(device) from Generator import Generator G = Generator(INPUT_DIM) G.apply(initialize_weights) if USE_GPU: G = nn.DataParallel(G, device_ids=DeviceID).to(device) A_solver = torch.optim.Adam(A.parameters(), lr=A_LR) # C_solver may not be used in this task IC_solver = torch.optim.Adam(IC.parameters(), lr=IC_LR) D_solver = torch.optim.Adam(D.parameters(), lr=D_LR, betas=(0.5, 0.999)) G_solver = torch.optim.Adam(G.parameters(), lr=G_LR, betas=(0.5, 0.999)) # model.load_state_dict(torch.load('params.pkl', map_location=lambda storage, loc: storage)) train(IC, D, G, A, IC_solver, D_solver, G_solver, A_solver, device, train_loader, EPOCH)
class Tester: def __init__(self, input_dir, classifier_dir, debug=False, ext=(".jpg", ".png")): self.testImgFolder = input_dir self.classifier_dir = classifier_dir self.fsh = FileSystemHelper(input_dir, ext) self.classifier = Classifier(classifier_dir, debug) self.classifier.train() self.debug = debug def test(self): self.classifier.train() self.fsh.for_each_file_execute_this(self.callback) def callback(self, file_path, file_name): original = Image.from_path(file_path) to_show = Image(original.img.copy()) image = Image(original.img.copy()).to_gray().threshold() detector = DigitDetector(Image(original.img)) contours = detector.detect() strFinalString = "" posFinalString = "" sepFinalString = "" order = 1 for contour in contours: to_show.draw_rect((contour.x, contour.y, contour.w, contour.y), (0, 255, 0), 2) img_roi = Image(image.img.copy()).crop(contour).resize( RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT).inverte() npaROIResized = img_roi.vectorize(RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT) char_discovered = self.classifier.classify(npaROIResized) scale = 3.0 * float(contour.w) / 45.0 thickness = 3 text_size = Image.get_text_size(char_discovered, scale, thickness) centered_box_x = contour.x + (contour.w - text_size[0]) / 2 centered_box_y = contour.y + ( (contour.h - text_size[1]) / 2) + text_size[1] to_show.draw_text(char_discovered, (centered_box_x, centered_box_y), scale, (0, 255, 0), thickness) if self.debug: strFinalString = strFinalString + char_discovered posFinalString = posFinalString + str(order) sepFinalString = sepFinalString + "|" order = order + 1 if self.debug: print "\n" + posFinalString + "\n" + sepFinalString + "\n" + strFinalString + "\n" if to_show.show().wait() == 27: if self.debug: print "ESC key pressed, exiting" sys.exit(0)
def combinations(cls, classifier: Classifier): result = [] name = classifier.get_name() attributes = classifier.hyper_parameter_attributes.get_attributes() key, value = zip(*attributes.items()) for values in product(*value): hyperset = dict(zip(key, values)) result.append(Classifier(name, classifier.classifierType, HyperParameterAttributes(hyperset))) return result
class Roboy: def __init__(self): self.im = IM() self.cf = CF() #self.sm = SM() self.FACES_PATH = "faces" self.TEMP_PATH = "temp" self.sm = SoundModule() if not os.path.exists(self.FACES_PATH): os.makedirs(self.FACES_PATH) if not os.path.exists(self.TEMP_PATH): os.makedirs(self.TEMP_PATH) print("roboy instance made!") def addFaceToMemmory(self, name): faceimage = self.im.take_photo() [fl, fe, fn] = self.cf.findFaces(faceimage) if name == "unknown": print("That's a shitty name!") return False #and not impressed elif len(fl) > 1: print("Too Many faces bro!.. give me some air!") return False # and frustrated elif (len(fl) == 1 and fl[0] == CF.UNKNOWN and fl[0] == name): print("You trying to fool me mate? You are not " + name + "! get lost!") return False # and angry self.im.save(faceimage, name, self.FACES_PATH) self.cf.train(self.FACES_PATH) # can be optimized - Later!! print("Hello!, nice to meet you " + name) return True # and happy face def lookForPersonInImage(self, target_name, image): fe, fl, fn = self.cf.findFaces(image) for i, name in enumerate(fn): #print("found "+str(name)) #print(name,CF.UNKNOWN,target_name, name is not CF.UNKNOWN,name == target_name, name is not CF.UNKNOWN and name == target_name) if name is not CF.UNKNOWN and name == target_name: #print("got here!") return fe[i], fl[i], fn[i] #return fl[i],fe[i],fn[i] # return the found image return None, None, None # else you found nothing def listen(self): while (True): phrase = self.sm.getNextPhrase() if ("my name is" in phrase): name = phrase.rsplit(' ', 1)[1] print("name: ", name) self.addFaceToMemmory(name) if ("find " in phrase): name = phrase.rsplit(' ', 1)[1] x, y, z = self.lookForPersonInImage(name, self.im.take_photo()) if (x is None): print("couldn't find " + name + " sorry!") else: print("There is " + name + "!!")
def __init__(self, _data, _trans, _cv): Classifier.__init__(self, _cv) self.data = _data.copy(deep=True) self.names = list(OrderedDict.fromkeys(self.data['CATEGORIA'].values)) self.y = self.data['CATEGORIA'].astype("category").cat.codes.values self.data.drop(['CATEGORIA ESPECIFICA', 'CATEGORIA'], axis=1, inplace=True) self.X = self.data.values
def __init__( self, k, mode = 0, distanceFunction = None) : self.k = k Classifier.__init__( self) self.logger.setDebugLevel( 0 ) self.logger.setFileDebugLevel( 3 ) self.distances = {} self.mode = mode self.dist = distanceFunction if(self.dist == None): self.dist = self.calculateDistance
def simple__init__(self): Classifier.simple__init__(self) assert not self._parameters.has_key('occ') self._parameters['occ'] = OCSVM() MCOCBClassifier.simple__init__(self) self._gridsearch = False
def data_to_classifiers(data, filter_stop_words): classifiers = [] stop_words = get_stop_words(filter_stop_words) stemmer = SnowballStemmer("english") for classification, text_files in data.items(): for text in text_files: classifier_new = Classifier(classification) classifier_new.count_words(stop_words, stemmer, text) classifiers.append(classifier_new) return classifiers
def simple__init__(self): Classifier.simple__init__(self) assert not self._parameters.has_key('bc') self._parameters['bc'] = SVMDBC() MCBBClassifier.simple__init__(self) self._gridsearch = False
def test(self, cyc, HN, tst, tsExpt, ftest): tsPerCyc = self.pm.get("tsPerCyc") cls = self.pm.get("cls") dvol = self.pm.get("dvol") tag = self.pm.get("tag") rTime = self.pm.get("rTime") sepY = self.pm.get("sepY") dThres = self.pm.get("dThres") isRecordReact = self.pm.get("isRecordReact") isDNATube = self.pm.get("isDNATube") clsNum = len(cls) # Prepare test Tubes # print("\t\t\t\tPreparing test tubes...") tsTubes = list() for i in range(clsNum) : c = self.testData[i][1][0][0] lblPrf = "ts_cyc" + str(cyc) if isDNATube : tsTubes.extend(self.makeDNATubesFromData(lblPrf, "B", self.testData[i][0], c, tsPerCyc/2, tst, tsExpt)) else : tsTubes.extend(self.makeTubesFromData(lblPrf, "B", self.testData[i][0], c, tsPerCyc/2, tst, tsExpt)) random.shuffle(tsTubes) # Run Test # print("\t\t\t\tRunning test reaction...") testSummary = np.zeros((clsNum,clsNum)) for i in range(tsPerCyc) : print("\t\t\t\t%dth test" % (i+1)) subHN = [None]*clsNum DList = [None]*clsNum for j in range(clsNum) : ts = tsTubes[i] subHN[j] = HN[j].copyTube(dvol) subHN[j].setLabel("HN" + str(cls[j]) + "_Cycle" + str(cyc+1) + "_test" + str(i+1)) # print "\t\t\t\tPour test tube..." subHN[j].addTube(ts) # print "\t\t\t\tReacting..." Operator.reactionSSA(subHN[j], rTime, tag, isRecordReact) # print "\t\t\t\tElectrophoresis..." DList[j] = Operator.separation(subHN[j], sepY) csf = Classifier() if isDNATube : score, predict = csf.thresholdClassifyOnDNATube(DList, cls, dThres) else : score, predict = csf.thresholdClassifyOnTube(DList, cls, dThres) testSummary[cls.index(predict)][cls.index(ts.cls)] += 1 tsTubes[i] = None # Save Result dm.saveTestSummaryLine(ftest, cyc, testSummary)
def __init__(self, input_dir, classifier_dir, debug=False, ext=(".jpg", ".png")): self.testImgFolder = input_dir self.classifier_dir = classifier_dir self.fsh = FileSystemHelper(input_dir, ext) self.classifier = Classifier(classifier_dir, debug) self.classifier.train() self.debug = debug
def NN(): ratio = float(request.args.get('ratio')) ratio = (ratio * .1) + .2 j = int(request.args.get('j')) print(ratio, j) cf = Classifier(ratio) score = cf.GetFScore(j) response = flask.jsonify({'fscore': float(score) * 100}) response.headers.add('Access-Control-Allow-Origin', '*') return response
def main(): try: trainingData, tuningData, testData, priorSpam = buildDataSets() nbc = Classifier(priorSpam, COUNT_THRESHOLD, SMOOTHING_FACTOR, DEFAULT_PROBABILITY) # nbc = Classifier2(priorSpam, 0, .01, None) nbc.train(trainingData) nbc.classify(testData) report(testData) except Exception as e: print e return 5
def run(procId, procCount): connection = PgSQL.connect(user = "******", database = DatabaseName); memDb = redis.Redis( host='localhost', port=6379 ); TrainDbConfig = DbBuildConfig['train']; TestDbConfig = DbBuildConfig['test']; trainDocDb = DocumentsDatabase(connection, TrainDbConfig['DocTagsTable'], TrainDbConfig['RawDocTable'], TrainDbConfig['TagsTable'], TrainDbConfig['DocumentsTable'] ); testDocDb = DocumentsDatabase(connection, TestDbConfig['DocTagsTable'], TestDbConfig['RawDocTable'], TestDbConfig['TagsTable'], TestDbConfig['DocumentsTable'] ); trainFeatureDb = FeatureDatabase(connection, memDb, trainDocDb, TrainDbConfig['FeaturesTable'], TrainDbConfig['DocFeaturesTable'], TrainDbConfig['TagSpecificFeatureTable']); testFeatureDb = FeatureDatabase(connection, memDb, testDocDb, TestDbConfig['FeaturesTable'], TestDbConfig['DocFeaturesTable'], TestDbConfig['TagSpecificFeatureTable']); classifier = Classifier(connection, trainFeatureDb, testFeatureDb, ClassifierTableConfig['predictedTrain'], ClassifierTableConfig['predictedTest'], trainDocDb); # if procId == 0: # classifier.createTables(); # classifier.createTagPredictTables(); # classifier.cleanClassificationTables(); tags = trainDocDb.getTagsList(); count = 0; for tag in tags: count = count + 1; if count % procCount != procId: continue; if count < 9000: continue; print "Processing ", tag, " ", count; c1 = trainDocDb.getTagCount(tag); if c1 <= 23: continue; classifier.predictForTag( tag );
def captureFrameforAnalysis(self): try: self.mydatasetlist.get(self.mydatasetlist.curselection()) datasetName = self.mydatasetlist.get(self.mydatasetlist.curselection()) dataset_path = "binData/"+datasetName+".npz" print dataset_path img = cv2.cvtColor(self.current_frame, cv2.COLOR_BGR2RGB) cv2.namedWindow("CurrentFrame",cv2.WINDOW_NORMAL) cv2.imshow("CurrentFrame",img) cl = Classifier(img) cl.classifieSample(dataset_path) except: tkMessageBox.showerror("Error","Please pick a Dataset")
class Classifier_controller: def __init__(self): self.tf_idf = self.create_tf_idf() self.df_list = self.create_df_list() self.classes = self.create_classes() self.classifier = Classifier() def create_tf_idf(self): tf_idf = [] os.system("pwd") path1 = './tutorial/data/tf_idf' classes = os.listdir(path1) for each_class in classes: path2 = path1 + '/' + each_class files = os.listdir(path2) for each_file in files: path3 = path2 + '/' + each_file vector = dict() f = open(path3) dimes = f.readlines() f.close() i = 1 for dime in dimes: if float(dime) != 0.0: vector[i] = float(dime) i += 1 tf_idf.append( (int(each_class), vector.items()) ) print "creating class sample_vector...\n" print "finished..." return tf_idf def create_df_list(self): df_list = [] f1 = open('./tutorial/data/df.dat') f2 = open('./tutorial/data/attribute.dat') df_records = f1.readlines() att_records = f2.readlines() f1.close() f2.close() i = 0 for df in df_records: attribute = att_records[i].strip('\n') i += 1 df_list.append((attribute, int(df))) print "reading %s %d\n" %(attribute, int(df)) print "finished..." return df_list def create_classes(self): classes = [] f = open('./tutorial/data/classes.dat') for each in f.readlines(): classes.append(each.strip('\n')) f.close() return classes def get_classes(self, text, k): i = self.classifier.fun(text, self.df_list, len(self.tf_idf), self.tf_idf, k) return self.classes[i]
def apply_mutation(cl: Classifier, perception: list): """ :type cl: Classifier :param cl: :type perception: list :param perception: :return: """ for i in range(len(cl.condition)): if rd.random() < cons.nu: if cl.condition[i] == cons.dontCare: cl.condition[i] = perception[i] else: cl.condition[i] = cons.dontCare if rd.random() < cons.nu: c = rd.choice([i for i in range(0, (cons.nbAction - 1))]) cl.action = c
def gen_match_set(pop: list, percept: list): """ Generate a list of Classifier thats match current perception :param pop: :type pop: list :param percept: :type percept: list :return: :rtype: list """ ma = [] if time == 0 or len(pop) == 0: for i in range(cons.nbAction): newcl = Classifier() newcl.condition = [cons.symbol] * cons.lenCondition newcl.action = i newcl.effect = [cons.symbol] * cons.lenCondition newcl.exp = 0 newcl.t = time newcl.q = 0.5 pop.append(newcl) for c in pop: if does_match(c, percept): ma.append(c) return ma
def main(sc): start = timer() #### 1) Recuperando os produtos da base de dados #categs = ["Computers & Tablets", "Video Games", "TV & Home Theater"]# , ] stpwrds = stopwords.words('portuguese') products = findProductsByCategory([]) print '####### Creating product rdd with {} product'.format(len(products)) productRDD = sc.parallelize(products) #productRDD, discardedProductRDD = entiryProductRDD.randomSplit([2, 8], seed=0L) #### 2) Criadno o corpus de documento utilizando corpusRDD = productRDD.map(lambda s: (s[0], word_tokenize(s[1].lower()), s[2], s[3])).map(lambda s: (s[0], [PorterStemmer().stem(x) for x in s[1] if x not in stpwrds], s[2], s[3] )).map(lambda s: (s[0], [x[0] for x in pos_tag(s[1]) if x[1] == 'NN' or x[1] == 'NNP'], s[2], s[3])).cache() idfsRDD = idfs(corpusRDD) idfsRDDBroadcast = sc.broadcast(idfsRDD.collectAsMap()) tfidfRDD = corpusRDD.map(lambda x: (x[0], tfidf(x[1], idfsRDDBroadcast.value), x[2], x[3])) category = productRDD.map(lambda x: x[2]).distinct().collect() categoryAndSubcategory = productRDD.map(lambda x: (x[2], x[3])).distinct().collect() tokens = corpusRDD.flatMap(lambda x: x[1]).distinct().collect() insertTokensAndCategories(tokens, category, categoryAndSubcategory) classifier = Classifier(sc, 'NaiveBayes') trainingVectSpaceCategoryRDD, testVectSpaceCategoryRDD = classifier.createVectSpaceCategory(tfidfRDD, category, tokens).randomSplit([8, 2], seed=0L) modelNaiveBayesCategory = classifier.trainModel(trainingVectSpaceCategoryRDD, '/dados/models/naivebayes/category_new') predictionAndLabelCategoryRDD = testVectSpaceCategoryRDD.map(lambda p : (category[int(modelNaiveBayesCategory.predict(p.features))], category[int(p.label)])) acuraccyCategory = float(predictionAndLabelCategoryRDD.filter(lambda (x, v): x[0] == v[0]).count())/float(predictionAndLabelCategoryRDD.count()) print 'the accuracy of the Category Naive Bayes model is %f' % acuraccyCategory trainingVectSpaceSubcategory, testVectSpaceSubcategory = classifier.createVectSpaceSubcategory(tfidfRDD, categoryAndSubcategory, tokens).randomSplit([8, 2], seed=0L) modelNaiveBayesSubcategory = classifier.trainModel(trainingVectSpaceSubcategory, '/dados/models/naivebayes/subcategory_new') predictionAndLabelSubcategory = testVectSpaceSubcategory.map(lambda p : (categoryAndSubcategory[int(modelNaiveBayesSubcategory.predict(p.features))], categoryAndSubcategory[int(p.label)])) acuraccySubcategory = float(predictionAndLabelSubcategory.filter(lambda (x, v): x[0] == v[0]).count())/float(predictionAndLabelSubcategory.count()) print 'the accuracy of the Subcategory Naive Bayes model is %f' % acuraccySubcategory #test with DecisionTree Model classifierDT = Classifier(sc, 'DecisionTree') trainingVectSpaceCategory, testVectSpaceCategory = classifierDT.createVectSpaceCategory(tfidfRDD, category, tokens).randomSplit([8, 2], seed=0L) modelDecisionTreeCategory = classifierDT.trainModel(trainingVectSpaceCategory, '/dados/models/dt/category_new') predictions = modelDecisionTreeCategory.predict(testVectSpaceCategory.map(lambda x: x.features)) predictionAndLabelCategory = testVectSpaceCategory.map(lambda lp: lp.label).zip(predictions) acuraccyDecisionTree = float(predictionAndLabelCategory.filter(lambda (x, v): x == v).count())/float(predictionAndLabelCategory.count()) print 'the accuracy of the Decision Tree model is %f' % acuraccyDecisionTree elap = timer()-start print 'it tooks %d seconds' % elap
def validateClassifier(): cl = Classifier(dataset="binData/classificationTrainingPalmahim100.npz",regression=False) path_list = ["../data/training_classification/positive", "../data/training_classification/negative"] kmeans_path = 'binData/KmeansBlobsPalmahim100.pkl' #cl.classificationValidation(path_list, kmeans,kernel='linear',gamma=None,C=1) Cs = [0.001,0.002,0.003,0.004] gammas = [0.1] kernels = ["rbf","linear"] for kernel in kernels: if kernel == 'linear': gamma = None for C in Cs: cl.classificationValidation(path_list, kmeans,kernel=kernel,gamma=gamma,C=C) else: for gamma in gammas: for C in Cs: cl.classificationValidation(path_list, kmeans,kernel=kernel,gamma=gamma,C=C) cv.waitKey()
def confirmPush(self): limbList = [] for p in self.selection: if self.selection[p] == 1: limbList.append(p) self.pbar.setValue(0) homedir = os.getcwd() filt = Filter(homedir) filt.dataProcess() self.pbar.setValue(25) select = RandomSelector(homedir) select.dataProcess() self.pbar.setValue(50) st = StaticAnalyzer(homedir,limbList) st.dataProcess() self.pbar.setValue(75) c = Classifier(homedir) count,rate,total,result = c.staticClassify() self.pbar.setValue(100) reply = QtGui.QMessageBox.question(self, 'Static Analysis Result',"Total number is %d"%(total)+"\nCorrect number is %d"%(count)+"\nCorrect rate is %f"%(100*rate)+"%", QtGui.QMessageBox.Yes)
def __init__(self,files,chanNum): self.signal = [] self.stimulusCode = [] self.phaseInSequence = [] self.targetLetters = [] self.firsttrain = 1 self.cl = Classifier() self.sf = SpatialFilter(chanNum) self.rate = 0 self.files = files self.chanNum = chanNum
def update_application_average(cli: Classifier, t: int): """ Update Classifier's parameters aav :type t: int :param t: Time :type cli: Classifier """ if cli.exp < 1 / cons.beta: cli.aav += (t - cli.tga - cli.aav) / cli.exp else: cli.aav += cons.beta * (t - cli.tga - cli.aav) cli.tga = t
def run(): connection = PgSQL.connect(user = "******", database = DatabaseName); memDb = redis.Redis( host='localhost', port=6379 ); TrainDbConfig = DbBuildConfig['train']; TestDbConfig = DbBuildConfig['test']; trainDocDb = DocumentsDatabase(connection, TrainDbConfig['DocTagsTable'], TrainDbConfig['RawDocTable'], TrainDbConfig['TagsTable'], TrainDbConfig['DocumentsTable'] ); trainFeatureDb = FeatureDatabase(connection, memDb, trainDocDb, TrainDbConfig['FeaturesTable'], TrainDbConfig['DocFeaturesTable'], TrainDbConfig['TagSpecificFeatureTable']); testFeatureDb = FeatureDatabase(connection, memDb, None, TestDbConfig['FeaturesTable'], TestDbConfig['DocFeaturesTable'], TestDbConfig['TagSpecificFeatureTable']); classifier = Classifier(connection, trainFeatureDb, testFeatureDb, ClassifierTableConfig['predictedTrain'], ClassifierTableConfig['predictedTest'], trainDocDb); # classifier.createTables(); classifier.createTagPredictTables(); classifier.cleanClassificationTables(); tags = trainDocDb.getTagsList(); s1 = 0; s2 = 0; for tag in tags: features = trainFeatureDb.getTagSpecificFeatures( tag ); testTag = tag; hashes = trainFeatureDb.getTagSpecificFeatures(testTag); if not hashes: continue; c1 = trainDocDb.getTagCount(testTag); if c1 <= 25: continue; s1 += c1; print classifier.predictForTag( tag ); classifier.saveClassificationResults();
def load_classifier(neighbours, blur_scale, c=None, gamma=None, verbose=0): classifier_file = 'classifier_%s_%s.dat' \ % (blur_scale, neighbours) classifier_path = DATA_FOLDER + classifier_file if exists(classifier_file): if verbose: print 'Loading classifier...' classifier = Classifier(filename=classifier_path, \ neighbours=neighbours, verbose=verbose) elif c != None and gamma != None: if verbose: print 'Training new classifier...' classifier = Classifier(c=c, gamma=gamma, neighbours=neighbours, \ verbose=verbose) learning_set = load_learning_set(neighbours, blur_scale, \ verbose=verbose) classifier.train(learning_set) classifier.save(classifier_path) else: raise Exception('No soft margin and gamma specified.') return classifier
def apply_crossover(cl1: Classifier, cl2: Classifier): if cl1.effect != cl2.effect: return x = random() * (len(cl1.condition) + 1) while True: y = random() * (len(cl1.condition) + 1) if x != y: break if x > y: tmp = x x = y y = tmp i = 0 while True: if x <= i < y: tp = cl1.condition[i] cl1.condition[i] = cl2.condition[i] cl2.condition[i] = tp i += 1 if i <= y: break
def main(): # load training files into Classifier path_to_res = os.path.join(sys.path[0], "resources\\") neg_classif = Classifier(path_to_res + "training_negative.txt") neu_classif = Classifier(path_to_res + "training_neutral.txt") pos_classif = Classifier(path_to_res + "training_positive.txt") total_entries = neu_classif.get_entries() + neg_classif.get_entries() + pos_classif.get_entries() # load test files test_parser = Parser(path_to_res + "test_set.txt") counter = 1 while(True): word_list = test_parser.giveWordList() if len(word_list) == 0: break neg_p = neg_classif.classification_probability(word_list, total_entries) neu_p = neu_classif.classification_probability(word_list, total_entries) pos_p = pos_classif.classification_probability(word_list, total_entries) print("Test " + str(counter) + ":\n") print("\tNegative: " + str(math.fabs(neg_p)) + "%\n") print("\tNeutral: " + str(math.fabs(neu_p)) + "%\n") print("\tPositve: " + str(math.fabs(pos_p)) + "%\n") counter += 1
def unexpected_case(clas: Classifier, percept: list, percept_: list) -> Classifier: """ :rtype: Classifier """ assert (len(percept_) == cons.lenCondition), "Wrong leight" assert (len(percept) == cons.lenCondition), "Wrong leight" clas.q = clas.q - cons.beta * clas.q clas.mark = percept_ for i in range(len(percept)): if clas.effect[i] != cons.symbol: if clas.effect[i] != percept_[i] or percept_[i] != percept[i]: return None child = Classifier(clas) for i in range(len(percept)): if clas.effect[i] == cons.symbol and percept_[i] != percept[i]: child.condition[i] = percept_[i] child.effect[i] = percept[i] if clas.q < 0.5: clas.q = 0.5 child.exp = 1 return child
def apply_ga(aset: list, t: int, pop: list): sumNum = 0 sumTgaN = 0 for CL in aset: sumTgaN += CL.tga * CL.num sumNum += CL.num if (t - sumTgaN) / sumNum > cons.thetaGA: for CL in aset: CL.tga = t parent1 = select_offspring(aset) parent2 = select_offspring(aset) child1 = Classifier(parent1) child2 = Classifier(parent2) child1.num += 1 child2.num += 1 child1.exp += 1 child2.exp += 1 apply_ga_mutation(child1) apply_ga_mutation(child2) if random() < cons.x: apply_crossover(child1, child2) child1.r = (parent1.r + parent2.r) / 2 child2.r = (parent1.r + parent2.r) / 2 child1.q = (parent1.q + parent2.q) / 2 child2.q = (parent1.q + parent2.q) / 2 child1.q /= 2 child2.q /= 2 delete_classifier(aset, pop) if child1.condition != [cons.symbol] * cons.lenCondition: add_ga_classifier(aset, pop, child1) if child2.condition != [cons.symbol] * cons.lenCondition: add_ga_classifier(aset, pop, child2)
def __init__(self): self.classifier = Classifier()
def confirmPush(self): checked = self.staticRecognition.isChecked() or self.dynamicRecognition.isChecked() or self.fusionRecognition.isChecked() if not checked: reply = QtGui.QMessageBox.question(self, 'Analysis Result',"Select One", QtGui.QMessageBox.Yes) return homdir = os.getcwd() trainGaitPath = homdir+"\\Dataset\\TrainDataset\\TrainGaitDataset" if (os.path.exists(trainGaitPath)): shutil.rmtree(trainGaitPath) os.mkdir(trainGaitPath) else: os.mkdir(trainGaitPath) filterFilePath = homdir+"\\Dataset\\FilteredGaitDataset" files = os.listdir(filterFilePath) for f in files: fpath = filterFilePath + "\\"+f dstGaitPath = trainGaitPath+"\\"+f shutil.copytree(fpath,dstGaitPath) testGaitPath = homdir+"\\Dataset\\TestDataset\\TestGaitDataset" if (os.path.exists(testGaitPath)): shutil.rmtree(testGaitPath) os.mkdir(testGaitPath) else: os.mkdir(testGaitPath) exePath = "C:\Users\Niko\Documents\BodyBasics-D2D\Debug\BodyBasics-D2D " homdir = os.getcwd() outputFilePath = homdir+"\\test.txt" # outputFile = open(outputFilePath,'w') # outputFile.close() # os.system(exePath+outputFilePath) dstOutputPersonPath = homdir+"\\Dataset\\TestDataset\\TestGaitDataset\\Person001" os.mkdir(dstOutputPersonPath) dstOutputPath = dstOutputPersonPath+"\\1.txt" shutil.copy(outputFilePath,dstOutputPath) self.pbar.setValue(50) if self.staticRecognition.isChecked(): self.pbar.setValue(75) st = StaticAnalyzer(homdir,limbDescriptors) st.dataProcess() c = Classifier(homdir) count,rate,total,result = c.staticClassify() self.pbar.setValue(100) name = self.findName(result[0]) reply = QtGui.QMessageBox.question(self, 'Static Analysis Result',"This is "+name, QtGui.QMessageBox.Yes) elif self.dynamicRecognition.isChecked(): self.pbar.setValue(75) dy = DynamicAnalyzer(homdir,angleDescriptors) dy.dataProcess() c = Classifier(homdir) count,rate,total = c.dynamicClassify() self.pbar.setValue(100) reply = QtGui.QMessageBox.question(self, 'Dynamic Analysis Result',"Total number is %d"%(total)+"\nCorrect number is %d"%(count)+"\nCorrect rate is %f"%(100*rate)+"%", QtGui.QMessageBox.Yes) else: self.pbar.setValue(75) dy = DynamicAnalyzer(homdir,angleDescriptors) dy.dataProcess() st = StaticAnalyzer(homdir,limbDescriptors) st.dataProcess() c = Classifier(homdir) count,rate,total = c.fusionClassify() self.pbar.setValue(100) reply = QtGui.QMessageBox.question(self, 'Fusion Analysis Result',"Total number is %d"%(total)+"\nCorrect number is %d"%(count)+"\nCorrect rate is %f"%(100*rate)+"%", QtGui.QMessageBox.Yes)
def run(nb_filters=nb_filters, poolings=poolings, nbSubwindows=nbSubwindows, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, nbTrees=nbTrees, maxFeatures=maxFeatures, maxDepth=maxDepth, minSamplesSplit=minSamplesSplit, minSamplesLeaf=minSamplesLeaf, bootstrap=bootstrap, nbJobsEstimator=nbJobsEstimator, verbose=verbose, learningUse=learningUse, testingUse=testingUse): lsSize = learningUse if learningUse > maxLearningSize: lsSize = maxLearningSize tsSize = testingUse if testingUse > maxTestingSize: tsSize = maxTestingSize totalNbFeatures = nb_filters*len(poolings)*subwindowTargetWidth*subwindowTargetHeight*3 totalNbObj = lsSize*nbSubwindows nbFeatures = totalNbFeatures/nbJobs floatSize = np.zeros().itemsize singleArraySize = nbFeatures*totalNbObj*floatSize totalArraySize = totalNbFeatures*totalNbObj*floatSize #======INSTANTIATING========# os.environ["JOBLIB_TEMP_FOLDER"] = "/home/jmbegon/jmbegon/code/work/tmp/" #--Pixit-- memCoord = MemroyTestCoordinator(nbFeatures, totalNbObj) if nbJobs != 1: memCoord.parallelize(nbJobs, tempFolder) #--Extra-tree-- baseClassif = ExtraTreesClassifier(nbTrees, max_features=maxFeatures, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, bootstrap=bootstrap, n_jobs=nbJobsEstimator, verbose=verbose) #--Classifier classifier = Classifier(memCoord, baseClassif) #--Data-- loader = CifarFromNumpies(learningSetDir, learningIndexFile) learningSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) learningSet = learningSet[0:lsSize] loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:tsSize] #=====COMPUTATION=====# #--Learning--# classifier.fit(learningSet) print "=========================================" print "-----------Filtering--------------" print "nb_filters", nb_filters print "----------Pooling--------------" print "poolings", poolings print "--------SW extractor----------" print "#Subwindows", nbSubwindows print "subwindowTargetWidth", subwindowTargetWidth print "subwindowTargetHeight", subwindowTargetHeight print "------------Misc-----------------" print "tempFolder", tempFolder print "verbosity", verbosity print "nbJobs", nbJobs print "--------ExtraTrees----------" print "nbTrees", nbTrees print "maxFeatures", maxFeatures print "maxDepth", maxDepth print "minSamplesSplit", minSamplesSplit print "minSamplesLeaf", minSamplesLeaf print "bootstrap", bootstrap print "nbJobsEstimator", nbJobsEstimator print "verbose", verbose print "------------Data---------------" print "LearningSet size", len(learningSet) print "TestingSet size", len(testingSet) print "-------------------------------" print "totalNbFeatures", totalNbFeatures print "totalNbObj", totalNbObj print "singleArraySize", singleArraySize print "totalArraySize", totalArraySize
def apply_ga_mutation(classifier: Classifier): for i in range(len(classifier.condition)): if classifier.condition[i] != cons.symbol: if random() < cons.mu: classifier.condition[i] = cons.symbol
def cover_triple(percept_: list, action: int, percept: list, t: int) -> Classifier: child = Classifier() for i in range(len(percept)): if percept_[i] != percept[i]: child.condition[i] = percept_[i] child.effect[i] = percept[i] child.action = action child.exp = 0 child.r = 0 child.aav = 0 child.alp = t child.tga = t child.t = t child.q = 0.5 child.num = 1 return child
def run( nb_filters=nb_filters, filterPolicy=filterPolicy, poolings=poolings, extractor=extractor, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, fixedSize=fixedSize, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, random=random, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, nbTrees=nbTrees, maxFeatures=maxFeatures, maxDepth=maxDepth, minSamplesSplit=minSamplesSplit, minSamplesLeaf=minSamplesLeaf, bootstrap=bootstrap, randomClassif=randomClassif, nbJobsEstimator=nbJobsEstimator, verbose=verbose, learningUse=learningUse, testingUse=testingUse, saveFile=saveFile, shouldSave=shouldSave, ): randomState = None if not randomClassif: randomState = 100 lsSize = learningUse if learningUse > maxLearningSize: lsSize = maxLearningSize tsSize = testingUse if testingUse > maxTestingSize: tsSize = maxTestingSize # ======INSTANTIATING========# # --RandConv-- randConvCoord = coordinatorRandConvFactory( nbFilters=nb_filters, filterPolicy=filterPolicy, poolings=poolings, extractor=extractor, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, random=random, ) randConvCoord = LoadCoordinator(randConvCoord, learnFile, testFile) # --Extra-tree-- baseClassif = ExtraTreesClassifier( nbTrees, max_features=maxFeatures, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, bootstrap=bootstrap, n_jobs=nbJobsEstimator, random_state=randomState, verbose=verbose, ) # --Classifier classifier = Classifier(randConvCoord, baseClassif) # --Data-- loader = CifarFromNumpies(learningSetDir, learningIndexFile) learningSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) learningSet = learningSet[0:lsSize] loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:tsSize] # =====COMPUTATION=====# # --Learning--# print "Starting learning" fitStart = time() classifier.fit(learningSet) fitEnd = time() print "Learning done", formatDuration(fitEnd - fitStart) sys.stdout.flush() # --Testing--# y_truth = testingSet.getLabels() predStart = time() y_prob, y_pred = classifier.predict_predict_proba(testingSet) predEnd = time() accuracy = classifier.accuracy(y_pred, y_truth) confMat = classifier.confusionMatrix(y_pred, y_truth) # ====ANALYSIS=====# importance, order = randConvCoord.importancePerFeatureGrp(baseClassif) print "==================RandConv================" print "-----------Filtering--------------" print "nb_filters", nb_filters print "filterPolicy", filterPolicy print "----------Pooling--------------" print "poolings", poolings print "--------SW extractor----------" print "#Subwindows", nbSubwindows print "subwindowMinSizeRatio", subwindowMinSizeRatio print "subwindowMaxSizeRatio", subwindowMaxSizeRatio print "subwindowTargetWidth", subwindowTargetWidth print "subwindowTargetHeight", subwindowTargetHeight print "fixedSize", fixedSize print "------------Misc-----------------" print "includeOriginalImage", includeOriginalImage print "random", random print "tempFolder", tempFolder print "verbosity", verbosity print "nbJobs", nbJobs print "--------ExtraTrees----------" print "nbTrees", nbTrees print "maxFeatures", maxFeatures print "maxDepth", maxDepth print "minSamplesSplit", minSamplesSplit print "minSamplesLeaf", minSamplesLeaf print "bootstrap", bootstrap print "nbJobsEstimator", nbJobsEstimator print "verbose", verbose print "randomState", randomState print "------------Data---------------" print "LearningSet size", len(learningSet) print "TestingSet size", len(testingSet) print "-------------------------------" if shouldSave: print "saveFile", saveFile print "Fit time", formatDuration(fitEnd - fitStart) print "Classifcation time", formatDuration(predEnd - predStart) print "Accuracy", accuracy if shouldSave: np.save(saveFile, y_prob) return accuracy, confMat, importance, order
def run(lsFile, tsFile, **kwargs): randomState = None if random: randomState = 100 #======INSTANTIATING========# os.environ["JOBLIB_TEMP_FOLDER"] = "/home/jmbegon/jmbegon/code/work/tmp/" #--Pixit-- randConvCoord = coordinatorRandConvFactory( nbFilters=nb_filters, filterMinVal=filter_min_val, filterMaxVal=filter_max_val, filterMinSize=filterMinSize, filterMaxSize=filterMaxSize, nbSubwindows=nbSubwindows, subwindowMinSizeRatio=subwindowMinSizeRatio, subwindowMaxSizeRatio=subwindowMaxSizeRatio, subwindowTargetWidth=subwindowTargetWidth, subwindowTargetHeight=subwindowTargetHeight, poolings=poolings, filterNormalisation=filterNormalisation, subwindowInterpolation=subwindowInterpolation, includeOriginalImage=includeOriginalImage, nbJobs=nbJobs, verbosity=verbosity, tempFolder=tempFolder, random=random) #--Extra-tree-- baseClassif = ExtraTreesClassifier(nbTrees, max_features=maxFeatures, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, bootstrap=bootstrap, n_jobs=nbJobsEstimator, random_state=randomState, verbose=verbose) #--Classifier classifier = Classifier(randConvCoord, baseClassif) #--Data-- with open(lsFile, "wb") as f: lsSize, Xls, yls = pickle.load(f, protocol=2) loader = CifarFromNumpies(learningSetDir, learningIndexFile) learningSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) learningSet = learningSet[0:lsSize] with open(tsFile, "wb") as f: tsSize, Xts, yts = pickle.load(f, protocol=2) loader = CifarFromNumpies(testingSetDir, testingIndexFile) testingSet = FileImageBuffer(loader.getFiles(), NumpyImageLoader()) testingSet = testingSet[0:tsSize] #=====COMPUTATION=====# #--Learning--# print "Starting learning" fitStart = time() baseClassif.fit(Xls, yls) fitEnd = time() print "Learning done", (fitEnd-fitStart), "seconds (no extraction)" sys.stdout.flush() #--Testing--# y_truth = testingSet.getLabels() predStart = time() y_pred = classifier._predict(Xts, lsSize) predEnd = time() #====ANALYSIS=====# accuracy = classifier.accuracy(y_pred, y_truth) confMat = classifier.confusionMatrix(y_pred, y_truth) importance, order = randConvCoord.importancePerFeatureGrp(baseClassif) print "=========================================" print "--------ExtraTrees----------" print "nbTrees", nbTrees print "maxFeatures", maxFeatures print "maxDepth", maxDepth print "minSamplesSplit", minSamplesSplit print "minSamplesLeaf", minSamplesLeaf print "bootstrap", bootstrap print "nbJobsEstimator", nbJobsEstimator print "verbose", verbose print "randomState", randomState print "------------Data---------------" print "LearningSet size", len(learningSet) print "TestingSet size", len(testingSet) print "-------------------------------" print "Fit time (no extraction)", (fitEnd-fitStart), "seconds" print "Classifcation time (no extraction)", (predEnd-predStart), "seconds" print "Accuracy", accuracy return accuracy, confMat, importance, order
class Parser(object): def __init__(self): self.classifier = Classifier() def parse(self, page, url, time): try: links = [] print "Currently parsing: " + url soup = BeautifulSoup(page, 'html.parser') data = self.classifier.classify(soup, url) # get links only when the page is relevant if data is not None: links = self.getRelevantUris(soup, url) print 'No. of links retrieved: ' + str(len(links)) return (links, data, time) except: print "Parser: cannot parse page" return ([], None, time) # takes in a html page def getRelevantUris(self, page, url): # extract domain from url parsed_uri = urlparse(url) domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri) listOfLinks = [] for link in page.find_all('a'): listOfLinks.append(link.get('href')) # clean up the links listOfLinks = self.cleanLinks(listOfLinks, domain) # remove dups links = list(set(listOfLinks)) return links def cleanLinks(self, listOfLinks, domain): newLinks = [] for link in listOfLinks: if self.isErroneous(link): pass elif self.isRelativeLink(link): concatLink = self.concatRelativeLink(domain, link) newLinks.append(concatLink) else: newLinks.append(link) # absolute link return newLinks def isErroneous(self, link): if link is None or link.startswith('#') or link.startswith('.'): return True if 'mailto' in link or 'javascript' in link: return True else: return False def concatRelativeLink(self, domain, link): if link.startswith('/'): return (domain + link[1:]) # avoid double slashes // else: return (domain + link) def isRelativeLink(self, link): frontUrl = link.split('?',1)[0] if link.startswith('/'): return True if 'php' in frontUrl and '/' not in frontUrl: #php?param=1¶m=2 return True if len(link.split('.')) == 1: #games return True else: return False
parser.add_argument("-q", "--queries", help="""query sequence""", dest="queries", required=False) parser.add_argument("-c", dest="C", required=False) return parser model = None if __name__ == "__main__": parser = cmdline_parser() args = parser.parse_args() gta = list(SeqIO.parse(args.gta, "fasta")) viral = list(SeqIO.parse(args.viral, "fasta")) model = Classifier(gta, viral) queries = args.queries.split(',') for query in queries: query_seqs = list(SeqIO.parse(query, "fasta")) gene_num = int(query[query.find('orfg')+4]) if not model: # dist_matrix = parse_dists.get_dist_matrix(gene_num) model = Classifier(gta, viral) model.get_training_set() # model.get_weights() SVs = model.learn_SVM_model(float(args.C)) print model.classify(query_seqs)[1]
from sklearn import cross_validation from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, \ GradientBoostingClassifier from sklearn.grid_search import GridSearchCV from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import BernoulliNB from sklearn.svm import SVC, LinearSVC from sklearn.tree import DecisionTreeClassifier from Classifier import Classifier import numpy as np if __name__ == '__main__': #test models name = 'RF_test' model = RandomForestClassifier(n_estimators=100, max_features='auto', max_depth=None, min_samples_split=3, bootstrap=True, oob_score=True, n_jobs=-1, verbose=True) min_samples_int = np.linspace(1, 10, 10) min_samples_frac = np.linspace(0, 0.5, 3) param_grid = dict(min_samples_split=min_samples_int, min_samples_leaf=min_samples_int, min_weight_fraction_leaf=min_samples_frac) model = GridSearchCV(model, param_grid=param_grid, verbose=True, n_jobs=-1) clf = Classifier(model, name, one_hot=False, drop_cat=False, calibration=False) clf.run()
def __init__(self): self.tf_idf = self.create_tf_idf() self.df_list = self.create_df_list() self.classes = self.create_classes() self.classifier = Classifier()