def test(csv_file_name, new_feature): records = FE.extract_csv(csv_file_name, delimiter=',') records = FE.shuffle_record(records) # preprocess records data = [] label_target = [] num_target = [] group_label = [] group_count = {} target_dictionary = {} group_index = 0 features = [] for i in xrange(len(records)): record = records[i] if i==0: features = record[:-1] else: data.append(record[:-1]) label_target.append(record[-1]) if not (record[-1] in target_dictionary): target_dictionary[record[-1]] = group_index group_count[record[-1]] = 0 group_label.append(record[-1]) group_index += 1 group_count[record[-1]] += 1 num_target.append(target_dictionary[record[-1]]) group_label.sort() # calculate projection and prediction projection = FE.get_projection(new_feature, features, data) projection_data = [] for projection_value in projection: projection_data.append([projection_value]) target = num_target classifier = GaussianNB() prediction = classifier.fit(projection_data, target).predict(projection_data) # initiate true_count, false_count and accuracy true_count = dict(target_dictionary) false_count = dict(target_dictionary) accuracy = dict(target_dictionary) # reverse target dictionary reverse_target_dictionary = {} for key in target_dictionary: val = target_dictionary[key] reverse_target_dictionary[val] = key for key in true_count: true_count[key] = 0.0 false_count[key] = 0.0 accuracy[key] = 0.0 # calculate true_count and false_count for i in xrange(len(target)): target_value = target[i] prediction_value = prediction[i] if target_value == prediction_value: true_count[reverse_target_dictionary[target_value]] += 1 else: false_count[reverse_target_dictionary[target_value]] += 1 # calculate accuracy for key in accuracy: accuracy[key] = true_count[key]/(true_count[key]+false_count[key]) print(accuracy)
def extract_feature(csv_file_name, label = None): records = FE.extract_csv(csv_file_name, delimiter=',') records = FE.shuffle_record(records) groups = [] for record in records[1:]: if record[-1] not in groups: groups.append(record[-1]) group_count = len(groups) # define classifier classifier = DecisionTreeClassifier(max_depth=group_count-1, random_state=0) # define extractors params = {'max_epoch':2,'population_size':50, 'mutation_rate':0.25, 'new_rate':0.5, 'elitism_rate':0.05, 'crossover_rate': 0.2, 'stopping_value':1.0} extractors = [ #{'class': FE.GA_Select_Feature, 'label':'GA Select Feature', 'color':'red', 'params':params}, #{'class': FE.GE_Global_Separability_Fitness, 'label':'GE Global', 'color':'blue', 'params':params}, #{'class': FE.GE_Multi_Accuration_Fitness, 'label':'GE Multi', 'color':'cyan', 'params':params}, {'class': FE.GE_Tatami_Multi_Accuration_Fitness, 'label':'GE Tatami Multi', 'color':'magenta', 'params':params}, #{'class': FE.GE_Gravalis, 'label':'GE Gravalis', 'color':'green','params':params}, ] # get label if label is None: file_name_partials = csv_file_name.split('.') if(len(file_name_partials)>1): label = '.'.join(file_name_partials[0:len(file_name_partials)-1]) else: label = csv_file_name # extract feature fold_count = 1 FE.extract_feature(records, label+' (whole)', fold_count, extractors, classifier) '''
def extract_features_from_file(features, filename): """Extract data of specific 'feature' from 'filename'.""" data = read_data(filename) time_series = data['data'] freq = data['sampling_frequency'] windowed_data = { 'data': None, 'sampling_frequency': freq, 'sequence': data['sequence'], 'channels': data['channels'] } T = 0 freq_floor = np.floor(freq) num_windows = int((data['data_length_sec'] - WINDOW) / WINDOW_SHIFT) + 1 res = {} for i in range(num_windows): start, end = int(T * freq_floor), int((T + WINDOW) * freq_floor) windowed_data['data'] = time_series[:, start:end] fe = Feature_Extractor(windowed_data) T += WINDOW_SHIFT for feature in features: method = getattr(fe, 'extract_' + feature) res[feature] = method() if feature not in res.keys( ) else np.vstack([res[feature], method()]) return res
def __init__(self): self.dist = Utils.dist() self.nltk_Tools = Utils.nltk_tools() self.pickler = Utils.pickler() self.tools = Utils.tools() self.weight = Utils.weight() self.dataset_tools = Utils.dataset_tools(self.dist, self.nltk_Tools, self.pickler, self.tools) self.extractor = Feature_Extractor.extractor(self.dist, self.nltk_Tools, self.pickler, self.tools, self.weight, "authors", "titles") # Load model for prediction self.model = self.pickler.loadPickle('ModelCFS.pickle') self.model_v2 = self.pickler.loadPickle('ModelCFS_v2.pickle')
def __init__(self): self.feature_extractor=Feature_Extractor.Feature_Extractor() self.crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.05, c2=0.01, max_iterations=50, all_possible_transitions=True, min_freq=0)#micro: 52.13
# configurations pretrained_vgg = False simplified = True sample_per_texture = 5 data_sample_path = "dataset/texture_simplified_20" if simplified else "dataset/texture" model_dict_path = "state_dict/features/(margin = 5) feature_extraction_dict_150_epochs_best_0-0357" # Stage 1: Preprocess Images image_preprocessor = Image_Preprocessor(sample_per_texture, data_sample_path) image_preprocessor.validate_data_in_file() preprocessed_images = image_preprocessor.read_and_process_images_in_directory() # Stage 2: Feature Extraction vgg19_model = models.vgg19(pretrained=True) if pretrained_vgg: feature_extraction_manager = Feature_Extractor(vgg19_model, train=True) print("pretrained model used") else: base_net = Base_VGG19_Features_Net(vgg19_model) model = Triplet_Net(base_net) model.load_state_dict(torch.load(model_dict_path)) print("model path added") feature_extraction_manager = Feature_Extractor(model, train=False) extracted_features = feature_extraction_manager.classify_features(preprocessed_images) # Stage 3: Sanity check feature_extraction_manager.sanity_check_for_same_class(5, extracted_features) feature_extraction_manager.sanity_check_for_different_class(extracted_features, len(preprocessed_images)) # Stage 4: TSNE dimension reduction
import Feature_Extractor import Utils import sys if __name__ == '__main__': nltk_Tools = Utils.nltk_tools() tools = Utils.tools() weight = Utils.weight() dist = Utils.dist() pickler = Utils.pickler() dataset_tools = Utils.dataset_tools(dist, nltk_Tools, pickler, tools) authors = pickler.loadPickle(pickler.pathAuthors) titles = pickler.loadPickle(pickler.pathTitles) run = Feature_Extractor.extractor(dist, nltk_Tools, pickler, tools, weight, authors, titles) raw = pickler.loadPickle(pickler.pathRaw) annotations = pickler.loadPickle(pickler.pathAnnotations) experiment = dataset_tools.fetchExperiment(raw) (forannotation, keys, X, targets) = dataset_tools.prepDataset(run, raw, experiment, annotations) pickler.dumpPickle(forannotation, "For_Annotation") pickler.dumpPickle(keys, "DatasetTBA_keys") pickler.dumpPickle(X, "DatasetTBA") pickler.dumpPickle(targets, "Targets") #(forannotation, keys, X) = dataset_tools.prepDatasetCFS(run, raw, experiment) #pickler.dumpPickle(forannotation, "For_AnnotationCFS") #pickler.dumpPickle(keys, "DatasetTBA_keysCFS") #pickler.dumpPickle(X, "DatasetTBACFS")
import Feature_Extractor import Utils import sys if __name__ == '__main__': nltk_Tools = Utils.nltk_tools() tools = Utils.tools() weight = Utils.weight() dist = Utils.dist() pickler = Utils.pickler() dataset_tools = Utils.dataset_tools(dist, nltk_Tools, pickler, tools) authors = pickler.loadPickle(pickler.pathAuthors) titles = pickler.loadPickle(pickler.pathTitles) run = Feature_Extractor.extractor(dist, nltk_Tools, pickler, tools, weight, authors, titles) raw = pickler.loadPickle(pickler.pathRaw) annotations = pickler.loadPickle(pickler.pathAnnotations) experiment = dataset_tools.fetchExperiment(raw) experiment = experiment[0:200] (forannotation, keys, X, targets) = dataset_tools.prepDataset(run, raw, experiment, annotations) pickler.dumpPickle(forannotation, "For_Annotation") pickler.dumpPickle(keys, "DatasetTBA_keys") pickler.dumpPickle(X, "DatasetTBA") pickler.dumpPickle(targets, "Targets") #(forannotation, keys, X) = dataset_tools.prepDatasetCFS(run, raw, experiment) #pickler.dumpPickle(forannotation, "For_AnnotationCFS")
#FEATURE-EXTRACTOR# import Feature_Extractor import json from io import open result = {} urls = open('url.txt', 'r') #urls.readlines() jsonresponse = open('jsonresponse.txt', 'w') count = 0 for linea in urls.readlines(): result.update( {count: Feature_Extractor.main(linea.replace('\n', '')).whois}) count += 1 jsonresponse.write(json.dumps(result))