예제 #1
0
def predict(sentence: "Sentence string to predict output classes for"):

  with open('features.json', 'r') as f:
    feature_names = json.loads(f.read())

  with open('labels.json', 'r') as f2:
    labels = json.loads(f2.read())

  core_nlp  = CoreNLP()
  tokens    = core_nlp.tokenize_sentence(sentence)
  features  = extract_features(tokens)
  
  X = pd.DataFrame(features).fillna(0) * 1
  X = pd.get_dummies(X)
  X = pd.DataFrame(X, columns = feature_names).fillna(0)
  
  model  = load_model('model.h5')
  
  pred   = model.predict(X)
  result = pd.DataFrame(pred, columns=labels)
  result = result.round(2)

  result['word'] = [token['token'] for token in tokens]
  result['vote'] = result[labels].idxmax(axis=1)
  print(result)
  return result
예제 #2
0
def train():
  core_nlp = CoreNLP()
  
  df = pd.read_table('data/train.txt', delimiter=' ', names=['word', 'label'])

  sentences = " ".join(df['word']).replace(' ,', ',').replace(' .', '.').split('. ')
  sentences = [s.strip().replace('.', '') + '.' for s in sentences]
  
  features = []
  for s in sentences:
    tokens = core_nlp.tokenize_sentence(s)
    features.append(extract_features(tokens))

  X = pd.concat([pd.DataFrame(f) for f in features], sort=False)
  X = X.fillna(0) * 1 # convert True to 0 before one_hot_enc

  X = pd.get_dummies(X)            # one_hot_enc
  Y = pd.get_dummies(df['label'])  # aka to_categorical

  model = train_model(X, Y)
  
  model.save('model.h5')

  with open('labels.json', 'w') as f1:    # Labels used
    json.dump(Y.columns.tolist(), f1)
  
  with open('features.json', 'w') as f2:  # Features used
    json.dump(X.columns.tolist(), f2)

  return model
예제 #3
0
def init_features(info, f_extr_params):

    exceptions = []
    for track in info:
        if not track['tags']:
            exceptions.append(track['file_name'])
        else:
            track['features'] = f_extr_params['path_to'] + track[
                'file_name'][:-4]

    fe.extract_features(path_from=f_extr_params['path_from'],
                        path_to=f_extr_params['path_to'],
                        tmp_path_for_wavs=f_extr_params['tmp_path_for_wavs'],
                        exceptions=exceptions,
                        separator=f_extr_params['separator'],
                        methode=f_extr_params['methode'],
                        linear=f_extr_params['linear'])

    return info
예제 #4
0
def feature_extraction(request):
    filename = str(uuid.uuid1())
    original_location = "Audio_Files/Uploaded_MP3/" + filename
    with open(original_location, 'wb') as f:
        f.write(request.body)
    sound = AudioSegment.from_file(original_location)
    location = "Audio_Files/WAV/" + filename + ".wav"
    sound.export(location, format="wav")
    if os.path.exists(original_location):
        os.remove(original_location)
    features = extract_features(location).reshape(1, -1)
    if os.path.exists(location):
        os.remove(location)
    Voice_Disorder_Model = pickle.load(open("Voice_Disorder_Model.sav", 'rb'))
    ans = Voice_Disorder_Model.predict(features)
    print(int(ans[0]))
    data = {'Disorder': int(ans[0])}
    return JsonResponse(data)
 def get_features(self, windows_size: int, with_previous_class_feature: bool = False) -> np.ndarray:
     return fex.extract_features(self.__get_data_array(), windows_size,
                                 with_previous_class_feature=with_previous_class_feature)[0]
예제 #6
0
        train_data, dev_data, test_data, label_dictionary = data_fetching.fetch_data(
            S_DATASET)

        # PRE-PROCESSING
        print('Data preprocessing...')
        train_data['preprocessed'] = tweet_preprocessing.preprocess_data(
            train_data['content'], 'main')
        dev_data['preprocessed'] = tweet_preprocessing.preprocess_data(
            dev_data['content'], 'main')
        if B_TEST_PHASE is True:
            test_data['preprocessed'] = tweet_preprocessing.preprocess_data(
                test_data['content'], 'main')

        # FEATURE EXTRACTION
        print('Feature extraction...')
        train_features = feature_extraction.extract_features(
            train_data, train_data)
        dev_features = feature_extraction.extract_features(
            dev_data, train_data)
        test_features = feature_extraction.extract_features(
            test_data, train_data)

        train_length = len(train_data)
        test_length = len(test_data)

        if B_TWEET_LENGTH_ANALYSIS:
            utils.print_separator('Tweet length analysis')
            print('Maximum lengths (words):')
            print('Average lengths (words):')
            print('     train: {}'.format(
                train_features['tweet_length'].max()))
            print('     test: {}'.format(test_features['tweet_length'].max()))
예제 #7
0
def main(s3_bucket_name, s3_object_key):
    image = get_image(s3_bucket_name, s3_object_key)
    image_features = extract_features(image)
    print(image_features)