def extractFeatures(self, train_data, test_data): # Construct Feature Extractor fe = FeatureExtractor() fe.buildVectorizer(train_data, self.config['featureKwargs']) # Make feature path if it doesnt exist if not os.path.exists(self.feature_path): os.mkdir(self.feature_path) # Check if train vectors already exist if os.path.exists(os.path.join(self.feature_path, 'train_vectors.npz')): # If it does, load them train_vectors = load_npz(os.path.join(self.feature_path, 'train_vectors.npz')) else: # Make the train vectors train_vectors = [fe.process(feature, train_data) for feature in self.config['features']] if len(train_vectors) > 1: train_vectors = numpy.concatenate(train_vectors, axis=1) else: train_vectors = train_vectors[0] # Save the train vectors save_npz(os.path.join(self.feature_path, 'train_vectors.npz'), train_vectors) # Check if test vectors already exist if os.path.exists(os.path.join(self.feature_path, 'test_vectors.npz')): # If it does, load them test_vectors = load_npz(os.path.join(self.feature_path, 'test_vectors.npz')) else: # Make the test vectors test_vectors = [fe.process(feature, test_data) for feature in self.config['features']] if len(test_vectors) > 1: test_vectors = numpy.concatenate(test_vectors, axis=1) else: test_vectors = test_vectors[0] # Save the test vectors save_npz(os.path.join(self.feature_path, 'test_vectors.npz'), test_vectors) return train_vectors, test_vectors
def __init__(self, vehicle): print("Trip Starts") self.vehicle = vehicle self.timer = 0 self.data = [] self.featureExtractor = FeatureExtractor() self.tripNo = 1
def __init__(self, name, epsilon=0.05, gamma=0.8, alpha=0.2, numTraining=0): self.name = name self.cards = [] #(cardValue, cardElement) self.accumulatedCards = {"Fire": 0, "Water": 0, "Ice": 0} self.playedCard = None self.args = {} self.args['epsilon'] = epsilon self.args['gamma'] = gamma self.args['alpha'] = alpha self.args['numTraining'] = numTraining self.weights = Counter() # self.weights["enemy-distance-to-closest-win"] = 1.3999995454998298e-06 # self.weights["agent-distance-to-closest-win"] = 1.299999463999758e-06 self.weights["enemy-distance-to-closest-win"] = -4.120535635213156 self.weights["agent-distance-to-closest-win"] = 9.586679017815417 self.weights["agent-went-closer-to-win"] = -0.9656494587969497 self.weights["agent-can-block-enemy-advancement"] = 15.147299275663869 self.featExtractor = FeatureExtractor() self.lastState = None self.lastAction = None self.lastScore = 0
def create_generators(): # creating training generator preloader = MatrixPreLoader(dataset_directory=training_filepath, patients_to_use="ALL", activity_types=activities_to_load, print_loading_progress=False) matrix_data_generator = MatrixDataGenerator(preloader, matrix_dimensions=(224, 224), rgb=True, twoD=False, add_gaussian_noise=0, zero_sensors=0, batch_size=32, grab_data_from=(0, 1), overflow="BEFORE", print_loading_progress=False) training_generator = FeatureExtractor(matrix_data_generator, patient_fall_filepath, weigths_filepath, test=True) # create testing generator preloader = MatrixPreLoader(dataset_directory=testing_filepath, patients_to_use="ALL", activity_types=activities_to_load, print_loading_progress=False) matrix_data_generator = MatrixDataGenerator(preloader, matrix_dimensions=(224, 224), rgb=True, twoD=False, add_gaussian_noise=0, zero_sensors=3, batch_size=50, grab_data_from=(0, 1), overflow="BEFORE", print_loading_progress=False) testing_generator = FeatureExtractor(matrix_data_generator, patient_fall_filepath, weigths_filepath, test=False) return training_generator, testing_generator
def testExtration(self): featureExt = FeatureExtractor() agent = Player("aql agent") enemy = Player("greedy agent") gameState = GameState(agent, enemy) enemy.accumulatedCards["Water"] += 1 enemy.accumulatedCards["Fire"] += 1 features = featureExt.getFeatures(gameState, "action", agent.name) self.assertEqual(features["enemy-distance-to-closest-win"], 1) self.assertEqual(features["agent-distance-to-closest-win"], 4) agent.cards.append((1, "Water")) enemy.accumulatedCards["Fire"] -= 1 enemy.accumulatedCards["Water"] += 1 features = featureExt.getFeatures(gameState, "action", agent.name) self.assertEqual(features["agent-distance-to-closest-win"], 3) self.assertEqual(features["enemy-distance-to-closest-win"], 1)
def extractFeatures(self, train_data, test_data): #Extract Features and pass them as concatenated arrays fe = FeatureExtractor(self.config['features'], self.config['featurePath'], self.config['featureKwargs']) fe.buildVectorizer(train_data) #Check for ALready done work if path.exists(self.config['featurePath'] + "train_data.pickle"): print("here's the error?") with open(self.config['featurePath'] + "train_data.pickle", "rb") as file: train_vectors = pickle.load(file) else: train_vectors = fe.process(train_data) with open(self.config['featurePath'] + "train_data.pickle", "wb+") as file: pickle.dump(train_vectors, file) if len(train_vectors) > 1: print("took option A") train_vectors = numpy.concatenate(train_vectors, axis=1) else: print("took option B") train_vectors = train_vectors[0] print(train_vectors.shape) print(train_vectors[1, :]) #Check for ALready done work if path.exists(self.config['featurePath'] + "test_data.pickle"): with open(self.config['featurePath'] + "test_data.pickle", "rb") as file: test_vectors = pickle.load(file) else: test_vectors = fe.process(test_data) with open(self.config['featurePath'] + "test_data.pickle", "wb+") as file: pickle.dump(test_vectors, file) if len(test_vectors) > 1: test_vectors = numpy.concatenate(test_vectors, axis=1) else: test_vectors = test_vectors[0] return train_vectors.toarray(), test_vectors.toarray()
else: featureSettings['polarity'] = set( i for i in args.polarityFeatures.split(',')) #elif args.featuresFile: # features = set(args.featuresFile.read().splitlines()) #else: # features = {} #print(features) if args.markersFile: featureSettings['markersFile'] = args.markersFile #hiddenFileName += '_markers' print(featureSettings) fe = FeatureExtractor(**featureSettings) os.mkdir(args.session) os.chdir(args.session) if not args.dataDir: dataDir = '/local/nlp/chidey/social_meaning/aclImdb/' else: dataDir = args.dataDir main(dataDir, fe, '{}_{}'.format(args.session, 0), args.numtrain) learnCommandTemplate = '/local/nlp/chidey/social_meaning/yessenalina/sle_movieReviews/bin/svm_sle_learn -v 3 -c {0} -l {1} {2}_{3}_{4} hidden_vars_{3}_{4}_{2} model_{3}_{4}'.format( args.c, args.l, '{0}', args.session, '{1}') #2=train/validate/testfile #3=session #4=iteration
# The pre-computed features can also be downloiad from http://iamai.nl/downloads/features.npy if not isfile(featurePath): print("indexing images...") Steles = [ join(stelePath, f) for f in listdir(stelePath) if isdir(join(stelePath, f)) ] for stele in Steles: imagePaths = [ join(stele, f) for f in listdir(stele) if isfile(join(stele, f)) ] for path in imagePaths: image_paths.append(path) labels.append(path[(path.rfind("_") + 1):path.rfind(".")]) featureExtractor = FeatureExtractor() features = [] print("computing features...") for idx, (batch_images, _) in enumerate(batchGenerator(image_paths, labels, batch_size)): print("{}/{}".format((idx + 1) * batch_size, len(labels))) features_ = featureExtractor.get_features(batch_images) features.append(features_) features = np.vstack(features) labels = np.asarray(labels) print("saving features...") np.save(featurePath, features) np.save(labelsPath, labels) else: print("loading precomputed features and labels from {} and {}".format(
from NeuralNetwork import NeuralNetwork from featureExtractor import FeatureExtractor import numpy as np from DataLoader import DataLoader import configure #best results: #(0.05, 60, 30, 30) #0.5428571428571428 #(0.1, 50, 60, 30) #0.5714285714285714 #(0.1, 300, 100, 30) #0.6 fe = FeatureExtractor("generatedData/eigenfaces.csv", "generatedData/average_face.csv") best = 0.0 for lr in range(4, 7, 1): for ne in range(380, 421, 5): for nhn in range(40, 91, 10): for nev in range(50, 91, 10): configure.setUpConfig(lr / 100, ne, nhn, nev) #prepare data for training: dl = DataLoader(configure.config_global.modeTrain) dl.load_all_images() datasetTrain = fe.generate_dataset(dl.images) #train NN: nn = NeuralNetwork(configure.config_global.noOfEigenValues, configure.config_global.noOfHidNeur) nn.trainNetwork(datasetTrain)
def main(): """" Preprocesses, extracts, learns, tests""" # process flags do_retrain, do_rebuildValidation, do_test = False, False, False for arg in sys.argv[1:]: if ("--retrain" in arg): if ("yes" in arg): do_retrain = True if ("--rebuildValidation" in arg): if ("yes" in arg): do_rebuildValidation = True if ("--test" in arg): if ("yes" in arg): do_test = True # preprocessing do = DataOrganizer() # __________________________________ TRAINING ________________________ # # use BoG to convert to frequency vector fe = FeatureExtractor(FeatureExtractor.ModelType.BagOfClusters) clf = 0 clf_file = "" # get the latest trained model filenames = os.listdir("models/") if len(filenames) > 0: clf_file = "models/" + filenames[-1] else: clf_file = None # get sets of tweets as training data # trainData0, trainData1, validation0, validation1 \ # = do.organizeTrainWithValidation("data/trainValidate/", do_rebuildValidation) trainData0, trainData1 = do.organizeTrain("data/train/") if do_retrain or not clf_file: # split training set into validation and training set X0, X1 = fe.extractTrainFeatureVectors((trainData0, trainData1)) clf = learn(X0, X1) millis = int(round(time.time() * 1000)) clf_file = "trainedModel" + str(millis) print "Saving model to file..." joblib.dump(clf, "models/" + clf_file, compress=1) else: print "Using trained model and BoG..." fe.bog = BagOfWords() fe.bog.getLatestBoG() clf = joblib.load(clf_file) # we're either validating or testing based on the passed flag # ____________________________________VALIDATION__________________________# if not do_test: # feed in the validation sets as one set validationData = do.organizeTest("data/validation/") validationFeatures, validationLabels = fe.extractTestFeatureVectors( validationData) test("Validation", clf, validationFeatures, validationLabels) else: # ____________________________________TESTING _______________________ # # extract test features and test print "Using testing" testData, testLabels = do.organizeTest("data/test/") testFeatures = fe.extractTestFeatureVectors(testData) test("Testing, Global Protests With Background Subtraction", clf, testFeatures, testLabels)
def registrate(drone_img_ori, pcl_img_ori, mask_image, args): common_args = { 'pcl_mask': args.pcl_mask, 'drone_mask': args.drone_mask, 'save_masked_pcl': args.save_masked_pcl, 'save_masked_drone': args.save_masked_drone, 'save_keypoints': args.save_keypoints, 'save_csv': args.save_csv, 'save_matching': args.save_matching } result = {} # Preprocess Images img_preprocessor = Preprocessor(drone_img_ori, pcl_img_ori, mask_image) img_preprocessor.preprocessing() imgs = img_preprocessor.get_processed_imgs() processed_drone_img = imgs['processed_drone_img'] processed_pcl_img = imgs['processed_pcl_img'] processed_drone_mask = imgs['processed_drone_mask'] processed_pcl_mask = imgs['processed_pcl_mask'] masked_drone_img = imgs['masked_drone_img'] masked_pcl_img = imgs['masked_pcl_img'] if common_args['save_masked_pcl'] is True: result.update({'masked_pcl': masked_pcl_img}) if common_args['save_masked_drone'] is True: result.update({'masked_drone': masked_drone_img}) # Extract Features drone_feature_extractor = FeatureExtractor(processed_drone_img, "SIFT", args) pcl_feature_extractor = FeatureExtractor(processed_pcl_img, "SIFT", args) if common_args['pcl_mask'] is True: print("pcl_mask: True") pcl_feature_extractor.compute(mask=processed_pcl_mask) else: print("No pcl_mask") pcl_feature_extractor.compute(mask=None) if common_args['drone_mask'] is True: print("drone_mask: True") drone_feature_extractor.compute(mask=processed_drone_mask) else: print('No drone_mask') drone_feature_extractor.compute(mask=None) drone_features, drone_descs = drone_feature_extractor.get_features_and_descriptors( ) pcl_features, pcl_descs = pcl_feature_extractor.get_features_and_descriptors( ) if common_args['save_keypoints'] is True: keypoints_lidar = cv2.drawKeypoints( pcl_img_ori, pcl_features, outImage=np.array([]), color=(0, 0, 255), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) keypoints_drone = cv2.drawKeypoints( drone_img_ori, drone_features, outImage=np.array([]), color=(0, 0, 255), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) result.update({'keypoints_lidar_image': keypoints_lidar}) result.update({'keypoints_drone_image': keypoints_drone}) # Find Matching matcher = Matcher(drone_features, drone_descs, pcl_features, pcl_descs, args) matcher.extract_match() raw_matchs = matcher.get_matchs() good_matchs = matcher.get_good_matchs() # Find Homography homography, status = find_homography(drone_features, pcl_features, good_matchs, args) if common_args['save_csv'] is True: result.update({'drone_total_keypoints': len(drone_features)}) result.update({'pcl_total_keypoints': len(pcl_features)}) result.update({'num_inliers': (status.ravel().astype(int) == 1).sum()}) result.update({'num_raw_matches': len(raw_matchs)}) result.update({'num_good_matches': len(good_matchs)}) result.update({'homography': homography}) if common_args['save_matching'] is True: matching1 = matcher.draw_matches(processed_drone_img, processed_pcl_img, status, homography) matching2 = matcher.draw_matches(processed_drone_img, processed_pcl_img, None, homography) matching3 = matcher.draw_matches(processed_drone_img, processed_pcl_img) result.update({'matching1': matching1}) result.update({'matching2': matching2}) result.update({'matching3': matching3}) registated_image = cv2.warpPerspective( drone_img_ori, homography, (processed_pcl_img.shape[1], processed_pcl_img.shape[0])) ret_image = cv2.add(registated_image, cv2.cvtColor(processed_pcl_img, cv2.COLOR_GRAY2BGR)) result.update({'image': ret_image}) return result
avg = float(sum(relavant_vals)) / len(points) if avg > 0: res[dimension] = avg return res examples = [] laterexamples = [] vectorX = [] with open("Period 5 Rand.csv", 'rb') as file_reader: reader = csv.reader(file_reader, delimiter=",") counter = 0 for line in reader: if counter == 5000: break #to maintain manageable time counter += 1 vectorX.append(line[1]) laterObj = (line[0], FeatureExtractor(line[1]).featureVector()) examples.append(laterObj[1]) laterexamples.append(laterObj) extractor = sklearn.feature_extraction.text.CountVectorizer( input='content', ngram_range=(2, 3), max_df=.7, stop_words='english') X = extractor.fit_transform(vectorX) distArr = [] inerArr = [] for k in range(1, 20): clusterer = sklearn.cluster.KMeans(n_clusters=k) res = clusterer.fit(X) print "Inertia with %d clusters is %d" % (k, clusterer.inertia_) inerArr.append(clusterer.inertia_) #build the dict-style clusters so I can use my same distortion function consistently across them
def __init__(self, corpdb=fwc.DEF_CORPDB, corptable=fwc.DEF_CORPTABLE, correl_field=fwc.DEF_CORREL_FIELD, mysql_host="localhost", message_field=fwc.DEF_MESSAGE_FIELD, messageid_field=fwc.DEF_MESSAGEID_FIELD, encoding=fwc.DEF_ENCODING, use_unicode=fwc.DEF_UNICODE_SWITCH, lexicondb=fwc.DEF_LEXICON_DB, featureTable=fwc.DEF_FEAT_TABLE, featNames=fwc.DEF_FEAT_NAMES, date_field=fwc.DEF_DATE_FIELD, outcome_table=fwc.DEF_OUTCOME_TABLE, outcome_value_fields=[fwc.DEF_OUTCOME_FIELD], outcome_controls=fwc.DEF_OUTCOME_CONTROLS, outcome_interaction=fwc.DEF_OUTCOME_CONTROLS, group_freq_thresh=None, featureMappingTable='', featureMappingLex='', output_name='', wordTable=None, model=fwc.DEF_MODEL, feature_selection='', feature_selection_string='', init=None): if feature_selection_string or feature_selection: RegressionPredictor.featureSelectionString = feature_selection if feature_selection else feature_selection_string if init: self.fw = FeatureWorker( corpdb, corptable, correl_field, mysql_host, message_field, messageid_field, encoding, use_unicode, lexicondb, date_field, wordTable) if 'fw' in init else None self.fg = FeatureGetter( corpdb, corptable, correl_field, mysql_host, message_field, messageid_field, encoding, use_unicode, lexicondb, featureTable, featNames, wordTable) if 'fg' in init else None self.fe = FeatureExtractor( corpdb, corptable, correl_field, mysql_host, message_field, messageid_field, encoding, use_unicode, lexicondb, wordTable=wordTable) if 'fe' in init else None self.fr = FeatureRefiner( corpdb, corptable, correl_field, mysql_host, message_field, messageid_field, encoding, use_unicode, lexicondb, featureTable, featNames, wordTable) if 'fr' in init else None self.og = OutcomeGetter( corpdb, corptable, correl_field, mysql_host, message_field, messageid_field, encoding, use_unicode, lexicondb, outcome_table, outcome_value_fields, outcome_controls, outcome_interaction, group_freq_thresh, featureMappingTable, featureMappingLex, wordTable) if 'og' in init else None self.oa = OutcomeAnalyzer( corpdb, corptable, correl_field, mysql_host, message_field, messageid_field, encoding, use_unicode, lexicondb, outcome_table, outcome_value_fields, outcome_controls, outcome_interaction, group_freq_thresh, featureMappingTable, featureMappingLex, output_name, wordTable) if 'oa' in init else None self.rp = RegressionPredictor(self.og, self.fg, model) if 'rp' in init else None self.cp = ClassifyPredictor(self.og, self.fg, model) if 'cp' in init else None else: self.fw = FeatureWorker(corpdb, corptable, correl_field, mysql_host, message_field, messageid_field, encoding, use_unicode, lexicondb, date_field, wordTable) self.fg = FeatureGetter(corpdb, corptable, correl_field, mysql_host, message_field, messageid_field, encoding, use_unicode, lexicondb, featureTable, featNames, wordTable) self.fe = FeatureExtractor(corpdb, corptable, correl_field, mysql_host, message_field, messageid_field, encoding, use_unicode, lexicondb, wordTable=wordTable) self.fr = FeatureRefiner(corpdb, corptable, correl_field, mysql_host, message_field, messageid_field, encoding, use_unicode, lexicondb, featureTable, featNames, wordTable) self.og = OutcomeGetter(corpdb, corptable, correl_field, mysql_host, message_field, messageid_field, encoding, use_unicode, lexicondb, outcome_table, outcome_value_fields, outcome_controls, outcome_interaction, group_freq_thresh, featureMappingTable, featureMappingLex, wordTable) self.oa = OutcomeAnalyzer( corpdb, corptable, correl_field, mysql_host, message_field, messageid_field, encoding, use_unicode, lexicondb, outcome_table, outcome_value_fields, outcome_controls, outcome_interaction, group_freq_thresh, featureMappingTable, featureMappingLex, output_name, wordTable) self.rp = RegressionPredictor(self.og, self.fg, model) self.cp = ClassifyPredictor(self.og, self.fg, model) self.allFW = { "FeatureWorker": self.fw, "FeatureGetter": self.fg, "FeatureExtractor": self.fe, "FeatureRefiner": self.fr, "OutcomeGetter": self.og, "OutcomeAnalyzer": self.oa, "RegressionPredictor": self.rp, "ClassifyPredictor": self.cp, }
def createDataset(self, pathRaw, pathLabels, pathVectors, pathCorrespondence): print("Loading spacy") nlp = spacy.load('en_core_web_md') print("loaded") labelIdx = 0 #minWords = 9999 #maxWords = 0 #avgWords = 0 listFiles = os.listdir(pathRaw) nTexts = len(listFiles) fdOutLabels = open(pathLabels, "w") fdOutCorrespondence = open(pathCorrespondence, "w") for i, fname in enumerate(listFiles): label = fname.split("_")[1] if label not in self.labelDict: self.labelDict[label] = labelIdx labelIdx += 1 numeric_label = self.labelDict[label] fdOutLabels.write(str(numeric_label) + "\n") fd = open(pathRaw + fname, "r") raw = fd.read() iF = FeatureExtractor(raw, nlp) #FIRST SENTENCE ONLY NOW instanceVectors = [] for wordDict in iF.features[0]: instanceVectors.append(wordDict["vector"]) #only include maxWords vectors if len(instanceVectors) == self.maxLen: break if len(instanceVectors) < self.maxLen: while len(instanceVectors) < self.maxLen: instanceVectors.append(list(np.zeros(268))) ''' nWords = len(instanceVectors) if nWords > maxWords: maxWords = nWords if nWords < minWords: minWords = nWords avgWords += nWords ''' self.dataset.append(instanceVectors) fd.close() print(i, "of", nTexts) fdOutLabels.close() self.dataset = np.array(self.dataset) #save feature vectors per text np.save(pathVectors, self.dataset) fdOutCorrespondence.write(str(self.labelDict)) fdOutCorrespondence.close() '''
def model_training(self): # check if the feature file is present, if so; there is no need to recompute the features # The pre-computed features can also be downloaded from http://iamai.nl/downloads/features.npy if not isfile(self.featurePath): print("indexing images...") Steles = [ join(self.stelePath, f) for f in listdir(self.stelePath) if isdir(join(self.stelePath, f)) ] for stele in Steles: imagePaths = [ join(stele, f) for f in listdir(stele) if isfile(join(stele, f)) ] for path in imagePaths: self.image_paths.append(path) self.labels.append(path[(path.rfind("_") + 1):path.rfind(".")]) featureExtractor = FeatureExtractor() features = [] print("computing features...") for idx, (batch_images, _) in enumerate( batchGenerator(self.image_paths, self.labels, self.batch_size)): print("{}/{}".format((idx + 1) * self.batch_size, len(self.labels))) features_ = featureExtractor.get_features(batch_images) features.append(features_) features = np.vstack(features) labels = np.asarray(self.labels) print("saving features...") np.save(self.featurePath, features) np.save(self.labelsPath, labels) else: print("loading precomputed features and labels from {} and {}". format(self.featurePath, self.labelsPath)) features = np.load(self.featurePath) labels = np.load(self.labelsPath) # on to the SVM trainign phase tobeDeleted = np.nonzero( labels == "UNKNOWN") # Remove the Unknown class from the database features = np.delete(features, tobeDeleted, 0) labels = np.delete(labels, tobeDeleted, 0) numImages = len(labels) trainSet, testSet, trainLabels, testLabels = train_test_split( features, labels, test_size=0.20, random_state=42) # Training SVM, feel free to use linear SVM (or another classifier for that matter) for faster training, however that will not give the confidence scores that can be used to rank hieroglyphs print("training SVM...") if 0: # optinal; either train 1 classifier fast, or search trough the parameter space by training multiple classifiers to sqeeze out that extra 2% clf = linear_model.LogisticRegression(C=10000) else: svr = linear_model.LogisticRegression() parameters = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]} clf = GridSearchCV(svr, parameters, n_jobs=8) clf.fit(trainSet, trainLabels) print(clf) print("finished training! saving...") joblib.dump(clf, self.svmPath, compress=1) prediction = clf.predict(testSet) accuracy = np.sum(testLabels == prediction) / float(len(prediction)) # for idx, pred in enumerate(prediction): # print("%-5s --> %s" % (testLabels[idx], pred)) print("accuracy = {}%".format(accuracy * 100))
def __init__(self, n): self.data = DataManager('../data/train.csv','../data/test.csv', n) self.fe = FeatureExtractor(self.data) self.eval = Evaluate()