Ejemplo n.º 1
0
def test(csv_file_name, new_feature):
    records = FE.extract_csv(csv_file_name, delimiter=',')
    records = FE.shuffle_record(records)
    # preprocess records
    data = []
    label_target = []
    num_target = []
    group_label = []
    group_count = {}
    target_dictionary = {}
    group_index = 0
    features = []
    for i in xrange(len(records)):
        record = records[i]
        if i==0:
            features = record[:-1]
        else:
            data.append(record[:-1])
            label_target.append(record[-1])
            if not (record[-1] in target_dictionary):
                target_dictionary[record[-1]] = group_index
                group_count[record[-1]] = 0
                group_label.append(record[-1])              
                group_index += 1
            group_count[record[-1]] += 1           
            num_target.append(target_dictionary[record[-1]])                
    group_label.sort()
    # calculate projection and prediction    
    projection = FE.get_projection(new_feature, features, data)
    projection_data = []
    for projection_value in projection:
        projection_data.append([projection_value])
    target = num_target
    classifier = GaussianNB()
    prediction = classifier.fit(projection_data, target).predict(projection_data)
    # initiate true_count, false_count and accuracy
    true_count = dict(target_dictionary)
    false_count = dict(target_dictionary)
    accuracy = dict(target_dictionary)
    # reverse target dictionary
    reverse_target_dictionary = {}
    for key in target_dictionary:
        val = target_dictionary[key]
        reverse_target_dictionary[val] = key
    for key in true_count:
        true_count[key] = 0.0
        false_count[key] = 0.0
        accuracy[key] = 0.0
    # calculate true_count and false_count
    for i in xrange(len(target)):
        target_value = target[i]
        prediction_value = prediction[i]
        if target_value == prediction_value:
            true_count[reverse_target_dictionary[target_value]] += 1
        else:
            false_count[reverse_target_dictionary[target_value]] += 1
    # calculate accuracy
    for key in accuracy:
        accuracy[key] = true_count[key]/(true_count[key]+false_count[key])
    print(accuracy)
Ejemplo n.º 2
0
def extract_feature(csv_file_name, label = None):
    records = FE.extract_csv(csv_file_name, delimiter=',')
    records = FE.shuffle_record(records)
    groups = []
    for record in records[1:]:
        if record[-1] not in groups:
            groups.append(record[-1])
    group_count = len(groups)
    # define classifier    
    classifier = DecisionTreeClassifier(max_depth=group_count-1, random_state=0)
    # define extractors
    params = {'max_epoch':2,'population_size':50, 'mutation_rate':0.25, 'new_rate':0.5, 
              'elitism_rate':0.05, 'crossover_rate': 0.2, 'stopping_value':1.0}
    extractors = [
        #{'class': FE.GA_Select_Feature, 'label':'GA Select Feature', 'color':'red', 'params':params},        
        #{'class': FE.GE_Global_Separability_Fitness, 'label':'GE Global', 'color':'blue', 'params':params},
        #{'class': FE.GE_Multi_Accuration_Fitness, 'label':'GE Multi', 'color':'cyan', 'params':params},
        {'class': FE.GE_Tatami_Multi_Accuration_Fitness, 'label':'GE Tatami Multi', 'color':'magenta', 'params':params},
        #{'class': FE.GE_Gravalis, 'label':'GE Gravalis', 'color':'green','params':params},
    ]
    # get label
    if label is None:
        file_name_partials = csv_file_name.split('.')
        if(len(file_name_partials)>1):
            label = '.'.join(file_name_partials[0:len(file_name_partials)-1])
        else:
            label = csv_file_name
    # extract feature
    fold_count = 1
    FE.extract_feature(records, label+' (whole)', fold_count, extractors, classifier)
    '''
Ejemplo n.º 3
0
def extract_features_from_file(features, filename):
    """Extract data of specific 'feature' from 'filename'."""
    data = read_data(filename)
    time_series = data['data']
    freq = data['sampling_frequency']
    windowed_data = {
        'data': None,
        'sampling_frequency': freq,
        'sequence': data['sequence'],
        'channels': data['channels']
    }
    T = 0
    freq_floor = np.floor(freq)
    num_windows = int((data['data_length_sec'] - WINDOW) / WINDOW_SHIFT) + 1
    res = {}
    for i in range(num_windows):
        start, end = int(T * freq_floor), int((T + WINDOW) * freq_floor)
        windowed_data['data'] = time_series[:, start:end]
        fe = Feature_Extractor(windowed_data)
        T += WINDOW_SHIFT
        for feature in features:
            method = getattr(fe, 'extract_' + feature)
            res[feature] = method() if feature not in res.keys(
            ) else np.vstack([res[feature], method()])
    return res
Ejemplo n.º 4
0
 def __init__(self):
   self.dist = Utils.dist()
   self.nltk_Tools = Utils.nltk_tools()
   self.pickler = Utils.pickler()
   self.tools = Utils.tools()
   self.weight = Utils.weight()
   self.dataset_tools = Utils.dataset_tools(self.dist, self.nltk_Tools, self.pickler, self.tools)
   self.extractor = Feature_Extractor.extractor(self.dist, self.nltk_Tools, self.pickler, self.tools, self.weight, "authors", "titles")
   # Load model for prediction
   self.model = self.pickler.loadPickle('ModelCFS.pickle')
   self.model_v2 = self.pickler.loadPickle('ModelCFS_v2.pickle')
Ejemplo n.º 5
0
 def __init__(self):
     self.dist = Utils.dist()
     self.nltk_Tools = Utils.nltk_tools()
     self.pickler = Utils.pickler()
     self.tools = Utils.tools()
     self.weight = Utils.weight()
     self.dataset_tools = Utils.dataset_tools(self.dist, self.nltk_Tools,
                                              self.pickler, self.tools)
     self.extractor = Feature_Extractor.extractor(self.dist,
                                                  self.nltk_Tools,
                                                  self.pickler, self.tools,
                                                  self.weight, "authors",
                                                  "titles")
     # Load model for prediction
     self.model = self.pickler.loadPickle('ModelCFS.pickle')
     self.model_v2 = self.pickler.loadPickle('ModelCFS_v2.pickle')
Ejemplo n.º 6
0
	def __init__(self):
		self.feature_extractor=Feature_Extractor.Feature_Extractor()
		self.crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.05, c2=0.01, max_iterations=50, all_possible_transitions=True, min_freq=0)#micro: 52.13
# configurations
pretrained_vgg = False
simplified = True
sample_per_texture = 5
data_sample_path = "dataset/texture_simplified_20" if simplified else "dataset/texture"
model_dict_path = "state_dict/features/(margin = 5) feature_extraction_dict_150_epochs_best_0-0357"

# Stage 1: Preprocess Images
image_preprocessor = Image_Preprocessor(sample_per_texture, data_sample_path)
image_preprocessor.validate_data_in_file()
preprocessed_images = image_preprocessor.read_and_process_images_in_directory()

# Stage 2: Feature Extraction
vgg19_model = models.vgg19(pretrained=True)
if pretrained_vgg:
    feature_extraction_manager = Feature_Extractor(vgg19_model, train=True)
    print("pretrained model used")
else:
    base_net = Base_VGG19_Features_Net(vgg19_model)
    model = Triplet_Net(base_net)
    model.load_state_dict(torch.load(model_dict_path))
    print("model path added")
    feature_extraction_manager = Feature_Extractor(model, train=False)

extracted_features = feature_extraction_manager.classify_features(preprocessed_images)

# Stage 3: Sanity check
feature_extraction_manager.sanity_check_for_same_class(5, extracted_features)
feature_extraction_manager.sanity_check_for_different_class(extracted_features, len(preprocessed_images))

# Stage 4: TSNE dimension reduction
Ejemplo n.º 8
0
import Feature_Extractor
import Utils
import sys

if __name__ == '__main__':
  nltk_Tools = Utils.nltk_tools()
  tools = Utils.tools()
  weight = Utils.weight()
  dist = Utils.dist()
  pickler = Utils.pickler()
  dataset_tools = Utils.dataset_tools(dist, nltk_Tools, pickler, tools)

  authors = pickler.loadPickle(pickler.pathAuthors)
  titles = pickler.loadPickle(pickler.pathTitles)

  run = Feature_Extractor.extractor(dist, nltk_Tools, pickler, tools, weight, authors, titles)
  raw = pickler.loadPickle(pickler.pathRaw)
  annotations = pickler.loadPickle(pickler.pathAnnotations)
  experiment = dataset_tools.fetchExperiment(raw)

  (forannotation, keys, X, targets) = dataset_tools.prepDataset(run, raw, experiment, annotations)
  pickler.dumpPickle(forannotation, "For_Annotation")
  pickler.dumpPickle(keys, "DatasetTBA_keys")
  pickler.dumpPickle(X, "DatasetTBA")
  pickler.dumpPickle(targets, "Targets")

  #(forannotation, keys, X) = dataset_tools.prepDatasetCFS(run, raw, experiment)
  #pickler.dumpPickle(forannotation, "For_AnnotationCFS")
  #pickler.dumpPickle(keys, "DatasetTBA_keysCFS")
  #pickler.dumpPickle(X, "DatasetTBACFS")
Ejemplo n.º 9
0
import Feature_Extractor
import Utils
import sys

if __name__ == '__main__':
    nltk_Tools = Utils.nltk_tools()
    tools = Utils.tools()
    weight = Utils.weight()
    dist = Utils.dist()
    pickler = Utils.pickler()
    dataset_tools = Utils.dataset_tools(dist, nltk_Tools, pickler, tools)

    authors = pickler.loadPickle(pickler.pathAuthors)
    titles = pickler.loadPickle(pickler.pathTitles)

    run = Feature_Extractor.extractor(dist, nltk_Tools, pickler, tools, weight,
                                      authors, titles)
    raw = pickler.loadPickle(pickler.pathRaw)
    annotations = pickler.loadPickle(pickler.pathAnnotations)
    experiment = dataset_tools.fetchExperiment(raw)

    experiment = experiment[0:200]

    (forannotation, keys, X,
     targets) = dataset_tools.prepDataset(run, raw, experiment, annotations)
    pickler.dumpPickle(forannotation, "For_Annotation")
    pickler.dumpPickle(keys, "DatasetTBA_keys")
    pickler.dumpPickle(X, "DatasetTBA")
    pickler.dumpPickle(targets, "Targets")

    #(forannotation, keys, X) = dataset_tools.prepDatasetCFS(run, raw, experiment)
    #pickler.dumpPickle(forannotation, "For_AnnotationCFS")
Ejemplo n.º 10
0
#FEATURE-EXTRACTOR#
import Feature_Extractor
import json
from io import open
result = {}
urls = open('url.txt', 'r')
#urls.readlines()
jsonresponse = open('jsonresponse.txt', 'w')
count = 0
for linea in urls.readlines():
    result.update(
        {count: Feature_Extractor.main(linea.replace('\n', '')).whois})
    count += 1
jsonresponse.write(json.dumps(result))